diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 6666636a09..4351c40d75 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -440,13 +440,8 @@ if(PKG_MSCG OR PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_LATTE OR find_package(BLAS) endif() if(NOT LAPACK_FOUND OR NOT BLAS_FOUND OR USE_INTERNAL_LINALG) - include(CheckGeneratorSupport) - if(NOT CMAKE_GENERATOR_SUPPORT_FORTRAN) - status(FATAL_ERROR "Cannot build internal linear algebra library as CMake build tool lacks Fortran support") - endif() - enable_language(Fortran) - file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.[fF]) - add_library(linalg STATIC ${LAPACK_SOURCES}) + file(GLOB LINALG_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.cpp) + add_library(linalg STATIC ${LINALG_SOURCES}) set_target_properties(linalg PROPERTIES OUTPUT_NAME lammps_linalg${LAMMPS_MACHINE}) set(BLAS_LIBRARIES "$") set(LAPACK_LIBRARIES "$") diff --git a/lib/linalg/Makefile.gfortran b/lib/linalg/Makefile.g++ similarity index 65% rename from lib/linalg/Makefile.gfortran rename to lib/linalg/Makefile.g++ index 89ac3bbe6d..b1e18e8fd3 100644 --- a/lib/linalg/Makefile.gfortran +++ b/lib/linalg/Makefile.g++ @@ -6,20 +6,19 @@ SHELL = /bin/sh # ------ FILES ------ -SRC = $(wildcard *.f) +SRC = $(wildcard *.cpp) FILES = $(SRC) Makefile.* README # ------ DEFINITIONS ------ LIB = liblinalg.a -OBJ = $(SRC:.f=.o) +OBJ = $(SRC:.cpp=.o) # ------ SETTINGS ------ -FC = gfortran -FFLAGS = -O3 -fPIC -ffast-math -fstrict-aliasing -fno-second-underscore -FFLAGS0 = -O0 -fPIC -fno-second-underscore +CXX = g++ -std=c++11 +CCFLAGS = -O3 -fPIC -ffast-math -fstrict-aliasing ARCHIVE = ar AR = ar ARCHFLAG = -rcs @@ -33,16 +32,13 @@ lib: $(OBJ) # ------ COMPILE RULES ------ -%.o:%.f - $(FC) $(FFLAGS) -c $< - -dlamch.o: dlamch.f - $(FC) $(FFLAGS0) -c $< +%.o:%.cpp + $(CC) $(CCFLAGS) -c $< # ------ CLEAN ------ clean: - -rm -f *.o *.mod *~ $(LIB) + -rm -f *.o *~ $(LIB) tar: -tar -czvf ../linalg.tar.gz $(FILES) diff --git a/lib/linalg/Makefile.mpi b/lib/linalg/Makefile.mpi index 74de6cdf3d..567653cebc 100644 --- a/lib/linalg/Makefile.mpi +++ b/lib/linalg/Makefile.mpi @@ -6,20 +6,19 @@ SHELL = /bin/sh # ------ FILES ------ -SRC = $(wildcard *.f) +SRC = $(wildcard *.cpp) FILES = $(SRC) Makefile.* README # ------ DEFINITIONS ------ LIB = liblinalg.a -OBJ = $(SRC:.f=.o) +OBJ = $(SRC:.cpp=.o) # ------ SETTINGS ------ -FC = mpifort -FFLAGS = -O3 -fPIC -FFLAGS0 = -O0 -fPIC +CC = mpicxx +CCFLAGS = -O3 -fPIC ARCHIVE = ar AR = ar ARCHFLAG = -rcs @@ -33,16 +32,13 @@ lib: $(OBJ) # ------ COMPILE RULES ------ -%.o:%.f - $(FC) $(FFLAGS) -c $< - -dlamch.o: dlamch.f - $(FC) $(FFLAGS0) -c $< +%.o:%.cpp + $(CC) $(CCFLAGS) -c $< # ------ CLEAN ------ clean: - -rm -f *.o *.mod *~ $(LIB) + -rm -f *.o *~ $(LIB) tar: -tar -czvf ../linalg.tar.gz $(FILES) diff --git a/lib/linalg/Makefile.serial b/lib/linalg/Makefile.serial index c52fbcb986..9d7bb000f9 120000 --- a/lib/linalg/Makefile.serial +++ b/lib/linalg/Makefile.serial @@ -1 +1 @@ -Makefile.gfortran \ No newline at end of file +Makefile.g++ \ No newline at end of file diff --git a/lib/linalg/convert.sh b/lib/linalg/convert.sh new file mode 100755 index 0000000000..c73e0d3393 --- /dev/null +++ b/lib/linalg/convert.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +has_f2c=$(type f2c > /dev/null 2>&1 && echo 1 || echo 0) +if test ${has_f2c} -eq 0 +then + echo "Must have f2c installed to run this script" + exit 1 +fi + +# cleanup +rm -f *.c *.cpp *.P *~ *.orig *.bak *.rej + +# translate files directly, skip those for which we have replacements. +for f in fortran/*.f +do \ + b=$(basename $f .f) + if test $b == dgetrf2 || test $b == disnan || test $b == dlaisnan || \ + test $b == dlamch || test $b == dlarft || test $b == dpotrf2 || \ + test $b == lsame || test $b == xerbla || test $b == zlarft + then + echo Skipping $b + else + f2c -C++ -a -f $f && mv $b.c $b.cpp || exit 2 + # silence c++ compiler warnings about string constants + sed -i -e 's/\("[^"]\+"\)/(char *)\1/g' -e 's/^extern.*"C"/extern "C"/' \ + -e 's/^#include.*"f2c.h"/#include "lmp_f2c.h"/' $b.cpp + fi +done + +# translate modified versions +for f in static/*.f +do \ + b=$(basename $f .f) + f2c -C++ -a -f $f && mv $b.c $b.cpp || exit 2 + # silence c++ compiler warnings about string constants + sed -i -e 's/\("[^"]\+"\)/(char *)\1/g' -e 's/^extern.*"C"/extern "C"/' \ + -e 's/^#include.*"f2c.h"/#include "lmp_f2c.h"/' $b.cpp +done + +# copy direct C++ alternatives +for c in static/*.cpp +do \ + cp -v $c . +done diff --git a/lib/linalg/dasum.cpp b/lib/linalg/dasum.cpp new file mode 100644 index 0000000000..0119bed6fd --- /dev/null +++ b/lib/linalg/dasum.cpp @@ -0,0 +1,168 @@ +/* fortran/dasum.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DASUM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION DX(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DASUM takes the sum of the absolute values. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DX */ +/* > \verbatim */ +/* > DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > modified 3/93 to return if incx .le. 0. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +doublereal dasum_(integer *n, doublereal *dx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6; + + /* Local variables */ + integer i__, m, mp1; + doublereal dtemp; + integer nincx; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dx; + + /* Function Body */ + ret_val = 0.; + dtemp = 0.; + if (*n <= 0 || *incx <= 0) { + return ret_val; + } + if (*incx == 1) { +/* code for increment equal to 1 */ + + +/* clean-up loop */ + + m = *n % 6; + if (m != 0) { + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += (d__1 = dx[i__], abs(d__1)); + } + if (*n < 6) { + ret_val = dtemp; + return ret_val; + } + } + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 6) { + dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], + abs(d__2)) + (d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = + dx[i__ + 3], abs(d__4)) + (d__5 = dx[i__ + 4], abs(d__5)) + + (d__6 = dx[i__ + 5], abs(d__6)); + } + } else { + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + dtemp += (d__1 = dx[i__], abs(d__1)); + } + } + ret_val = dtemp; + return ret_val; + +/* End of DASUM */ + +} /* dasum_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/daxpy.cpp b/lib/linalg/daxpy.cpp new file mode 100644 index 0000000000..d89cd6e9c4 --- /dev/null +++ b/lib/linalg/daxpy.cpp @@ -0,0 +1,194 @@ +/* fortran/daxpy.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DAXPY */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION DA */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION DX(*),DY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DAXPY constant times a vector plus a vector. */ +/* > uses unrolled loops for increments equal to one. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DA */ +/* > \verbatim */ +/* > DA is DOUBLE PRECISION */ +/* > On entry, DA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DX */ +/* > \verbatim */ +/* > DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DY */ +/* > \verbatim */ +/* > DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of DY */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx, + integer *incx, doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*da == 0.) { + return 0; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + + m = *n % 4; + if (m != 0) { + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] += *da * dx[i__]; + } + } + if (*n < 4) { + return 0; + } + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 4) { + dy[i__] += *da * dx[i__]; + dy[i__ + 1] += *da * dx[i__ + 1]; + dy[i__ + 2] += *da * dx[i__ + 2]; + dy[i__ + 3] += *da * dx[i__ + 3]; + } + } else { + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] += *da * dx[ix]; + ix += *incx; + iy += *incy; + } + } + return 0; + +/* End of DAXPY */ + +} /* daxpy_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dbdsqr.cpp b/lib/linalg/dbdsqr.cpp new file mode 100644 index 0000000000..78c996dc2b --- /dev/null +++ b/lib/linalg/dbdsqr.cpp @@ -0,0 +1,1053 @@ +/* fortran/dbdsqr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b15 = -.125; +static integer c__1 = 1; +static doublereal c_b49 = 1.; +static doublereal c_b72 = -1.; + +/* > \brief \b DBDSQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DBDSQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, */ +/* LDU, C, LDC, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DBDSQR computes the singular values and, optionally, the right and/or */ +/* > left singular vectors from the singular value decomposition (SVD) of */ +/* > a real N-by-N (upper or lower) bidiagonal matrix B using the implicit */ +/* > zero-shift QR algorithm. The SVD of B has the form */ +/* > */ +/* > B = Q * S * P**T */ +/* > */ +/* > where S is the diagonal matrix of singular values, Q is an orthogonal */ +/* > matrix of left singular vectors, and P is an orthogonal matrix of */ +/* > right singular vectors. If left singular vectors are requested, this */ +/* > subroutine actually returns U*Q instead of Q, and, if right singular */ +/* > vectors are requested, this subroutine returns P**T*VT instead of */ +/* > P**T, for given real input matrices U and VT. When U and VT are the */ +/* > orthogonal matrices that reduce a general matrix A to bidiagonal */ +/* > form: A = U*B*VT, as computed by DGEBRD, then */ +/* > */ +/* > A = (U*Q) * S * (P**T*VT) */ +/* > */ +/* > is the SVD of A. Optionally, the subroutine may also compute Q**T*C */ +/* > for a given real input matrix C. */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices With */ +/* > Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */ +/* > LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, */ +/* > no. 5, pp. 873-912, Sept 1990) and */ +/* > (char *)"Accurate singular values and differential qd algorithms," by */ +/* > B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics */ +/* > Department, University of California at Berkeley, July 1992 */ +/* > for a detailed description of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': B is upper bidiagonal; */ +/* > = 'L': B is lower bidiagonal. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCVT */ +/* > \verbatim */ +/* > NCVT is INTEGER */ +/* > The number of columns of the matrix VT. NCVT >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRU */ +/* > \verbatim */ +/* > NRU is INTEGER */ +/* > The number of rows of the matrix U. NRU >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCC */ +/* > \verbatim */ +/* > NCC is INTEGER */ +/* > The number of columns of the matrix C. NCC >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the n diagonal elements of the bidiagonal matrix B. */ +/* > On exit, if INFO=0, the singular values of B in decreasing */ +/* > order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the N-1 offdiagonal elements of the bidiagonal */ +/* > matrix B. */ +/* > On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E */ +/* > will contain the diagonal and superdiagonal elements of a */ +/* > bidiagonal matrix orthogonally equivalent to the one given */ +/* > as input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension (LDVT, NCVT) */ +/* > On entry, an N-by-NCVT matrix VT. */ +/* > On exit, VT is overwritten by P**T * VT. */ +/* > Not referenced if NCVT = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. */ +/* > LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension (LDU, N) */ +/* > On entry, an NRU-by-N matrix U. */ +/* > On exit, U is overwritten by U * Q. */ +/* > Not referenced if NRU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= max(1,NRU). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC, NCC) */ +/* > On entry, an N-by-NCC matrix C. */ +/* > On exit, C is overwritten by Q**T * C. */ +/* > Not referenced if NCC = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. */ +/* > LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*(N-1)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: If INFO = -i, the i-th argument had an illegal value */ +/* > > 0: */ +/* > if NCVT = NRU = NCC = 0, */ +/* > = 1, a split was marked by a positive value in E */ +/* > = 2, current block of Z not diagonalized after 30*N */ +/* > iterations (in inner while loop) */ +/* > = 3, termination criterion of outer while loop not met */ +/* > (program created more than N unreduced blocks) */ +/* > else NCVT = NRU = NCC = 0, */ +/* > the algorithm did not converge; D and E contain the */ +/* > elements of a bidiagonal matrix which is orthogonally */ +/* > similar to the input matrix B; if INFO = i, i */ +/* > elements of E have not converged to zero. */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) */ +/* > TOLMUL controls the convergence criterion of the QR loop. */ +/* > If it is positive, TOLMUL*EPS is the desired relative */ +/* > precision in the computed singular values. */ +/* > If it is negative, abs(TOLMUL*EPS*sigma_max) is the */ +/* > desired absolute accuracy in the computed singular */ +/* > values (corresponds to relative accuracy */ +/* > abs(TOLMUL*EPS) in the largest singular value. */ +/* > abs(TOLMUL) should be between 1 and 1/EPS, and preferably */ +/* > between 10 (for fast convergence) and .1/EPS */ +/* > (for there to be some accuracy in the results). */ +/* > Default is to lose at either one eighth or 2 of the */ +/* > available decimal digits in each computed singular value */ +/* > (whichever is smaller). */ +/* > */ +/* > MAXITR INTEGER, default = 6 */ +/* > MAXITR controls the maximum number of passes of the */ +/* > algorithm through its inner loop. The algorithms stops */ +/* > (and so fails to converge) if the number of passes */ +/* > through the inner loop exceeds MAXITR*N**2. */ +/* > */ +/* > \endverbatim */ + +/* > \par Note: */ +/* =========== */ +/* > */ +/* > \verbatim */ +/* > Bug report from Cezary Dendek. */ +/* > On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is */ +/* > removed since it can overflow pretty easily (for N larger or equal */ +/* > than 18,919). We instead use MAXITDIVN = MAXITR*N. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer * + nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, + integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer * + ldc, doublereal *work, integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign( + doublereal *, doublereal *); + + /* Local variables */ + integer iterdivn; + doublereal f, g, h__; + integer i__, j, m; + doublereal r__; + integer maxitdivn; + doublereal cs; + integer ll; + doublereal sn, mu; + integer nm1, nm12, nm13, lll; + doublereal eps, sll, tol, abse; + integer idir; + doublereal abss; + integer oldm; + doublereal cosl; + integer isub, iter; + doublereal unfl, sinl, cosr, smin, smax, sinr; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), dlas2_( + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *), dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal oldcs; + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen, ftnlen); + integer oldll; + doublereal shift, sigmn, oldsn; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal sminl, sigmx; + logical lower; + extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *, + doublereal *, integer *), dlasv2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), xerbla_(char *, + integer *, ftnlen); + doublereal sminoa, thresh; + logical rotate; + doublereal tolmul; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1); + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lower) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ncvt < 0) { + *info = -3; + } else if (*nru < 0) { + *info = -4; + } else if (*ncc < 0) { + *info = -5; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -9; + } else if (*ldu < max(1,*nru)) { + *info = -11; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DBDSQR", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + goto L160; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + +/* If no singular vectors desired, use qd algorithm */ + + if (! rotate) { + dlasq1_(n, &d__[1], &e[1], &work[1], info); + +/* If INFO equals 2, dqds didn't finish, try to finish */ + + if (*info != 2) { + return 0; + } + *info = 0; + } + + nm1 = *n - 1; + nm12 = nm1 + nm1; + nm13 = nm12 + nm1; + idir = 0; + +/* Get machine constants */ + + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + unfl = dlamch_((char *)"Safe minimum", (ftnlen)12); + +/* If matrix lower bidiagonal, rotate to be upper bidiagonal */ +/* by applying Givens rotations on the left */ + + if (lower) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + work[i__] = cs; + work[nm1 + i__] = sn; +/* L10: */ + } + +/* Update singular vectors if desired */ + + if (*nru > 0) { + dlasr_((char *)"R", (char *)"V", (char *)"F", nru, n, &work[1], &work[*n], &u[u_offset], + ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + if (*ncc > 0) { + dlasr_((char *)"L", (char *)"V", (char *)"F", n, ncc, &work[1], &work[*n], &c__[c_offset], + ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + +/* Compute singular values to relative accuracy TOL */ +/* (By setting TOL to be negative, algorithm will compute */ +/* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) */ + +/* Computing MAX */ +/* Computing MIN */ + d__3 = 100., d__4 = pow_dd(&eps, &c_b15); + d__1 = 10., d__2 = min(d__3,d__4); + tolmul = max(d__1,d__2); + tol = tolmul * eps; + +/* Compute approximate maximum, minimum singular values */ + + smax = 0.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); + smax = max(d__2,d__3); +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); + smax = max(d__2,d__3); +/* L30: */ + } + sminl = 0.; + if (tol >= 0.) { + +/* Relative accuracy desired */ + + sminoa = abs(d__[1]); + if (sminoa == 0.) { + goto L50; + } + mu = sminoa; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] + , abs(d__1)))); + sminoa = min(sminoa,mu); + if (sminoa == 0.) { + goto L50; + } +/* L40: */ + } +L50: + sminoa /= sqrt((doublereal) (*n)); +/* Computing MAX */ + d__1 = tol * sminoa, d__2 = *n * (*n * unfl) * 6; + thresh = max(d__1,d__2); + } else { + +/* Absolute accuracy desired */ + +/* Computing MAX */ + d__1 = abs(tol) * smax, d__2 = *n * (*n * unfl) * 6; + thresh = max(d__1,d__2); + } + +/* Prepare for main iteration loop for the singular values */ +/* (MAXIT is the maximum number of passes through the inner */ +/* loop permitted before nonconvergence signalled.) */ + + maxitdivn = *n * 6; + iterdivn = 0; + iter = -1; + oldll = -1; + oldm = -1; + +/* M points to last element of unconverged part of matrix */ + + m = *n; + +/* Begin main iteration loop */ + +L60: + +/* Check for convergence or exceeding iteration count */ + + if (m <= 1) { + goto L160; + } + + if (iter >= *n) { + iter -= *n; + ++iterdivn; + if (iterdivn >= maxitdivn) { + goto L200; + } + } + +/* Find diagonal block of matrix to work on */ + + if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { + d__[m] = 0.; + } + smax = (d__1 = d__[m], abs(d__1)); + smin = smax; + i__1 = m - 1; + for (lll = 1; lll <= i__1; ++lll) { + ll = m - lll; + abss = (d__1 = d__[ll], abs(d__1)); + abse = (d__1 = e[ll], abs(d__1)); + if (tol < 0. && abss <= thresh) { + d__[ll] = 0.; + } + if (abse <= thresh) { + goto L80; + } + smin = min(smin,abss); +/* Computing MAX */ + d__1 = max(smax,abss); + smax = max(d__1,abse); +/* L70: */ + } + ll = 0; + goto L90; +L80: + e[ll] = 0.; + +/* Matrix splits since E(LL) = 0 */ + + if (ll == m - 1) { + +/* Convergence of bottom singular value, return to top of loop */ + + --m; + goto L60; + } +L90: + ++ll; + +/* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ + + if (ll == m - 1) { + +/* 2 by 2 block, handle separately */ + + dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, + &sinl, &cosl); + d__[m - 1] = sigmx; + e[m - 1] = 0.; + d__[m] = sigmn; + +/* Compute singular vectors, if desired */ + + if (*ncvt > 0) { + drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & + cosr, &sinr); + } + if (*nru > 0) { + drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & + c__1, &cosl, &sinl); + } + if (*ncc > 0) { + drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & + cosl, &sinl); + } + m += -2; + goto L60; + } + +/* If working on new submatrix, choose shift direction */ +/* (from larger end diagonal element towards smaller) */ + + if (ll > oldm || m < oldll) { + if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { + +/* Chase bulge from top (big end) to bottom (small end) */ + + idir = 1; + } else { + +/* Chase bulge from bottom (big end) to top (small end) */ + + idir = 2; + } + } + +/* Apply convergence tests */ + + if (idir == 1) { + +/* Run convergence test in forward direction */ +/* First apply standard test to bottom of matrix */ + + if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( + d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) + { + e[m - 1] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* If relative accuracy desired, */ +/* apply convergence criterion forward */ + + mu = (d__1 = d__[ll], abs(d__1)); + sminl = mu; + i__1 = m - 1; + for (lll = ll; lll <= i__1; ++lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ + lll], abs(d__1)))); + sminl = min(sminl,mu); +/* L100: */ + } + } + + } else { + +/* Run convergence test in backward direction */ +/* First apply standard test to top of matrix */ + + if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) + ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { + e[ll] = 0.; + goto L60; + } + + if (tol >= 0.) { + +/* If relative accuracy desired, */ +/* apply convergence criterion backward */ + + mu = (d__1 = d__[m], abs(d__1)); + sminl = mu; + i__1 = ll; + for (lll = m - 1; lll >= i__1; --lll) { + if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { + e[lll] = 0.; + goto L60; + } + mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] + , abs(d__1)))); + sminl = min(sminl,mu); +/* L110: */ + } + } + } + oldll = ll; + oldm = m; + +/* Compute shift. First, test if shifting would ruin relative */ +/* accuracy, and if so set the shift to zero. */ + +/* Computing MAX */ + d__1 = eps, d__2 = tol * .01; + if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) { + +/* Use a zero shift to avoid loss of relative accuracy */ + + shift = 0.; + } else { + +/* Compute the shift from 2-by-2 block at end of matrix */ + + if (idir == 1) { + sll = (d__1 = d__[ll], abs(d__1)); + dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); + } else { + sll = (d__1 = d__[m], abs(d__1)); + dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); + } + +/* Test if shift negligible, and if so set to zero */ + + if (sll > 0.) { +/* Computing 2nd power */ + d__1 = shift / sll; + if (d__1 * d__1 < eps) { + shift = 0.; + } + } + } + +/* Increment iteration count */ + + iter = iter + m - ll; + +/* If SHIFT = 0, do simplified QR iteration */ + + if (shift == 0.) { + if (idir == 1) { + +/* Chase bulge from top to bottom */ +/* Save cosines and sines for later singular vector updates */ + + cs = 1.; + oldcs = 1.; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + d__1 = d__[i__] * cs; + dlartg_(&d__1, &e[i__], &cs, &sn, &r__); + if (i__ > ll) { + e[i__ - 1] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ + 1] * sn; + dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll + 1] = cs; + work[i__ - ll + 1 + nm1] = sn; + work[i__ - ll + 1 + nm12] = oldcs; + work[i__ - ll + 1 + nm13] = oldsn; +/* L120: */ + } + h__ = d__[m] * cs; + d__[m] = h__ * oldcs; + e[m - 1] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* Chase bulge from bottom to top */ +/* Save cosines and sines for later singular vector updates */ + + cs = 1.; + oldcs = 1.; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + d__1 = d__[i__] * cs; + dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); + if (i__ < m) { + e[i__] = oldsn * r__; + } + d__1 = oldcs * r__; + d__2 = d__[i__ - 1] * sn; + dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); + work[i__ - ll] = cs; + work[i__ - ll + nm1] = -sn; + work[i__ - ll + nm12] = oldcs; + work[i__ - ll + nm13] = -oldsn; +/* L130: */ + } + h__ = d__[ll] * cs; + d__[ll] = h__ * oldcs; + e[ll] = h__ * oldsn; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + } + } else { + +/* Use nonzero shift */ + + if (idir == 1) { + +/* Chase bulge from top to bottom */ +/* Save cosines and sines for later singular vector updates */ + + f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[ + ll]) + shift / d__[ll]); + g = e[ll]; + i__1 = m - 1; + for (i__ = ll; i__ <= i__1; ++i__) { + dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ > ll) { + e[i__ - 1] = r__; + } + f = cosr * d__[i__] + sinr * e[i__]; + e[i__] = cosr * e[i__] - sinr * d__[i__]; + g = sinr * d__[i__ + 1]; + d__[i__ + 1] = cosr * d__[i__ + 1]; + dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__] + sinl * d__[i__ + 1]; + d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; + if (i__ < m - 1) { + g = sinl * e[i__ + 1]; + e[i__ + 1] = cosl * e[i__ + 1]; + } + work[i__ - ll + 1] = cosr; + work[i__ - ll + 1 + nm1] = sinr; + work[i__ - ll + 1 + nm12] = cosl; + work[i__ - ll + 1 + nm13] = sinl; +/* L140: */ + } + e[m - 1] = f; + +/* Update singular vectors */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncvt, &work[1], &work[*n], &vt[ + ll + vt_dim1], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &i__1, &work[nm12 + 1], &work[nm13 + + 1], &u[ll * u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"L", (char *)"V", (char *)"F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + + 1], &c__[ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + } + +/* Test convergence */ + + if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { + e[m - 1] = 0.; + } + + } else { + +/* Chase bulge from bottom to top */ +/* Save cosines and sines for later singular vector updates */ + + f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m] + ) + shift / d__[m]); + g = e[m - 1]; + i__1 = ll + 1; + for (i__ = m; i__ >= i__1; --i__) { + dlartg_(&f, &g, &cosr, &sinr, &r__); + if (i__ < m) { + e[i__] = r__; + } + f = cosr * d__[i__] + sinr * e[i__ - 1]; + e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; + g = sinr * d__[i__ - 1]; + d__[i__ - 1] = cosr * d__[i__ - 1]; + dlartg_(&f, &g, &cosl, &sinl, &r__); + d__[i__] = r__; + f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; + d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; + if (i__ > ll + 1) { + g = sinl * e[i__ - 2]; + e[i__ - 2] = cosl * e[i__ - 2]; + } + work[i__ - ll] = cosr; + work[i__ - ll + nm1] = -sinr; + work[i__ - ll + nm12] = cosl; + work[i__ - ll + nm13] = -sinl; +/* L150: */ + } + e[ll] = f; + +/* Test convergence */ + + if ((d__1 = e[ll], abs(d__1)) <= thresh) { + e[ll] = 0.; + } + +/* Update singular vectors if desired */ + + if (*ncvt > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncvt, &work[nm12 + 1], &work[ + nm13 + 1], &vt[ll + vt_dim1], ldvt, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + } + if (*nru > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"R", (char *)"V", (char *)"B", nru, &i__1, &work[1], &work[*n], &u[ll * + u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + if (*ncc > 0) { + i__1 = m - ll + 1; + dlasr_((char *)"L", (char *)"V", (char *)"B", &i__1, ncc, &work[1], &work[*n], &c__[ + ll + c_dim1], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } + +/* QR iteration finished, go back and check convergence */ + + goto L60; + +/* All singular values converged, so make them positive */ + +L160: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] < 0.) { + d__[i__] = -d__[i__]; + +/* Change sign of singular vectors, if desired */ + + if (*ncvt > 0) { + dscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt); + } + } +/* L170: */ + } + +/* Sort the singular values into decreasing order (insertion sort on */ +/* singular values, but only one transposition per singular vector) */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I) */ + + isub = 1; + smin = d__[1]; + i__2 = *n + 1 - i__; + for (j = 2; j <= i__2; ++j) { + if (d__[j] <= smin) { + isub = j; + smin = d__[j]; + } +/* L180: */ + } + if (isub != *n + 1 - i__) { + +/* Swap singular values and vectors */ + + d__[isub] = d__[*n + 1 - i__]; + d__[*n + 1 - i__] = smin; + if (*ncvt > 0) { + dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + + vt_dim1], ldvt); + } + if (*nru > 0) { + dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * + u_dim1 + 1], &c__1); + } + if (*ncc > 0) { + dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + + c_dim1], ldc); + } + } +/* L190: */ + } + goto L220; + +/* Maximum number of iterations exceeded, failure to converge */ + +L200: + *info = 0; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L210: */ + } +L220: + return 0; + +/* End of DBDSQR */ + +} /* dbdsqr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dcabs1.cpp b/lib/linalg/dcabs1.cpp new file mode 100644 index 0000000000..2733be3c74 --- /dev/null +++ b/lib/linalg/dcabs1.cpp @@ -0,0 +1,92 @@ +/* fortran/dcabs1.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DCABS1 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DCABS1(Z) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 Z */ +/* .. */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DCABS1 computes |Re(.)| + |Im(.)| of a double complex number */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* ===================================================================== */ +doublereal dcabs1_(doublecomplex *z__) +{ + /* System generated locals */ + doublereal ret_val, d__1, d__2; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. */ +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ + + ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_imag(z__), abs(d__2)); + return ret_val; + +/* End of DCABS1 */ + +} /* dcabs1_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dcopy.cpp b/lib/linalg/dcopy.cpp new file mode 100644 index 0000000000..52fe214e90 --- /dev/null +++ b/lib/linalg/dcopy.cpp @@ -0,0 +1,187 @@ +/* fortran/dcopy.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DCOPY */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DCOPY(N,DX,INCX,DY,INCY) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION DX(*),DY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DCOPY copies a vector, x, to a vector, y. */ +/* > uses unrolled loops for increments equal to 1. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DX */ +/* > \verbatim */ +/* > DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DY */ +/* > \verbatim */ +/* > DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of DY */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + + m = *n % 7; + if (m != 0) { + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[i__] = dx[i__]; + } + if (*n < 7) { + return 0; + } + } + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 7) { + dy[i__] = dx[i__]; + dy[i__ + 1] = dx[i__ + 1]; + dy[i__ + 2] = dx[i__ + 2]; + dy[i__ + 3] = dx[i__ + 3]; + dy[i__ + 4] = dx[i__ + 4]; + dy[i__ + 5] = dx[i__ + 5]; + dy[i__ + 6] = dx[i__ + 6]; + } + } else { + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dy[iy] = dx[ix]; + ix += *incx; + iy += *incy; + } + } + return 0; + +/* End of DCOPY */ + +} /* dcopy_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ddot.cpp b/lib/linalg/ddot.cpp new file mode 100644 index 0000000000..3f57f45459 --- /dev/null +++ b/lib/linalg/ddot.cpp @@ -0,0 +1,189 @@ +/* fortran/ddot.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DDOT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION DX(*),DY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DDOT forms the dot product of two vectors. */ +/* > uses unrolled loops for increments equal to one. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DX */ +/* > \verbatim */ +/* > DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DY */ +/* > \verbatim */ +/* > DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of DY */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, + integer *incy) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + doublereal dtemp; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + ret_val = 0.; + dtemp = 0.; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + + m = *n % 5; + if (m != 0) { + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += dx[i__] * dy[i__]; + } + if (*n < 5) { + ret_val = dtemp; + return ret_val; + } + } + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + + dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + + dx[i__ + 4] * dy[i__ + 4]; + } + } else { + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp += dx[ix] * dy[iy]; + ix += *incx; + iy += *incy; + } + } + ret_val = dtemp; + return ret_val; + +/* End of DDOT */ + +} /* ddot_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgebd2.cpp b/lib/linalg/dgebd2.cpp new file mode 100644 index 0000000000..399cd3ca6e --- /dev/null +++ b/lib/linalg/dgebd2.cpp @@ -0,0 +1,393 @@ +/* fortran/dgebd2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEBD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), */ +/* $ TAUQ( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEBD2 reduces a real general m by n matrix A to upper or lower */ +/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ +/* > */ +/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows in the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns in the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n general matrix to be reduced. */ +/* > On exit, */ +/* > if m >= n, the diagonal and the first superdiagonal are */ +/* > overwritten with the upper bidiagonal matrix B; the */ +/* > elements below the diagonal, with the array TAUQ, represent */ +/* > the orthogonal matrix Q as a product of elementary */ +/* > reflectors, and the elements above the first superdiagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors; */ +/* > if m < n, the diagonal and the first subdiagonal are */ +/* > overwritten with the lower bidiagonal matrix B; the */ +/* > elements below the first subdiagonal, with the array TAUQ, */ +/* > represent the orthogonal matrix Q as a product of */ +/* > elementary reflectors, and the elements above the diagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The diagonal elements of the bidiagonal matrix B: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (min(M,N)-1) */ +/* > The off-diagonal elements of the bidiagonal matrix B: */ +/* > if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ +/* > if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUQ */ +/* > \verbatim */ +/* > TAUQ is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUP */ +/* > \verbatim */ +/* > TAUP is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix P. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (max(M,N)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrices Q and P are represented as products of elementary */ +/* > reflectors: */ +/* > */ +/* > If m >= n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ +/* > u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > If m < n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ +/* > u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples: */ +/* > */ +/* > m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ +/* > */ +/* > ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ +/* > ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ +/* > ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ +/* > ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ +/* > ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ +/* > ( v1 v2 v3 v4 v5 ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of B, vi */ +/* > denotes an element of the vector defining H(i), and ui an element of */ +/* > the vector defining G(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info < 0) { + i__1 = -(*info); + xerbla_((char *)"DGEBD2", &i__1, (ftnlen)6); + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + if (i__ < *n) { + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, & + tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1] + , (ftnlen)4); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *n) { + +/* Generate elementary reflector G(i) to annihilate */ +/* A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i+1:n) from the right */ + + i__2 = *m - i__; + i__3 = *n - i__; + dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1], + lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1], (ftnlen)5); + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } else { + taup[i__] = 0.; + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector G(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + +/* Apply G(i) to A(i+1:m,i:n) from the right */ + + if (i__ < *m) { + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, & + taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1], + (ftnlen)5); + } + a[i__ + i__ * a_dim1] = d__[i__]; + + if (i__ < *m) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Apply H(i) to A(i+1:m,i+1:n) from the left */ + + i__2 = *m - i__; + i__3 = *n - i__; + dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], & + c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &work[1], (ftnlen)4); + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } else { + tauq[i__] = 0.; + } +/* L20: */ + } + } + return 0; + +/* End of DGEBD2 */ + +} /* dgebd2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgebrd.cpp b/lib/linalg/dgebrd.cpp new file mode 100644 index 0000000000..84827afcfd --- /dev/null +++ b/lib/linalg/dgebrd.cpp @@ -0,0 +1,431 @@ +/* fortran/dgebrd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static doublereal c_b21 = -1.; +static doublereal c_b22 = 1.; + +/* > \brief \b DGEBRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEBRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, */ +/* INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), */ +/* $ TAUQ( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEBRD reduces a general real M-by-N matrix A to upper or lower */ +/* > bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. */ +/* > */ +/* > If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows in the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns in the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N general matrix to be reduced. */ +/* > On exit, */ +/* > if m >= n, the diagonal and the first superdiagonal are */ +/* > overwritten with the upper bidiagonal matrix B; the */ +/* > elements below the diagonal, with the array TAUQ, represent */ +/* > the orthogonal matrix Q as a product of elementary */ +/* > reflectors, and the elements above the first superdiagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors; */ +/* > if m < n, the diagonal and the first subdiagonal are */ +/* > overwritten with the lower bidiagonal matrix B; the */ +/* > elements below the first subdiagonal, with the array TAUQ, */ +/* > represent the orthogonal matrix Q as a product of */ +/* > elementary reflectors, and the elements above the diagonal, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The diagonal elements of the bidiagonal matrix B: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (min(M,N)-1) */ +/* > The off-diagonal elements of the bidiagonal matrix B: */ +/* > if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; */ +/* > if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUQ */ +/* > \verbatim */ +/* > TAUQ is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUP */ +/* > \verbatim */ +/* > TAUP is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix P. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= max(1,M,N). */ +/* > For optimum performance LWORK >= (M+N)*NB, where NB */ +/* > is the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrices Q and P are represented as products of elementary */ +/* > reflectors: */ +/* > */ +/* > If m >= n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); */ +/* > u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > If m < n, */ +/* > */ +/* > Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors; */ +/* > v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); */ +/* > u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); */ +/* > tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples: */ +/* > */ +/* > m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ +/* > */ +/* > ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) */ +/* > ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) */ +/* > ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) */ +/* > ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) */ +/* > ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) */ +/* > ( v1 v2 v3 v4 v5 ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of B, vi */ +/* > denotes an element of the vector defining H(i), and ui an element of */ +/* > the vector defining G(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal * + taup, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, nb, nx, ws; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + integer nbmin, iinfo, minmn; + extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dlabrd_(integer *, integer *, integer * + , doublereal *, integer *, doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *, integer *, doublereal *, integer *) + , xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwrkx, ldwrky, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + --work; + + /* Function Body */ + *info = 0; +/* Computing MAX */ + i__1 = 1, i__2 = ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nb = max(i__1,i__2); + lwkopt = (*m + *n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = max(1,*m); + if (*lwork < max(i__1,*n) && ! lquery) { + *info = -10; + } + } + if (*info < 0) { + i__1 = -(*info); + xerbla_((char *)"DGEBRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + minmn = min(*m,*n); + if (minmn == 0) { + work[1] = 1.; + return 0; + } + + ws = max(*m,*n); + ldwrkx = *m; + ldwrky = *n; + + if (nb > 1 && nb < minmn) { + +/* Set the crossover point NX. */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + +/* Determine when to switch from blocked to unblocked code. */ + + if (nx < minmn) { + ws = (*m + *n) * nb; + if (*lwork < ws) { + +/* Not enough work space for the optimal NB, consider using */ +/* a smaller block size. */ + + nbmin = ilaenv_(&c__2, (char *)"DGEBRD", (char *)" ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + if (*lwork >= (*m + *n) * nbmin) { + nb = *lwork / (*m + *n); + } else { + nb = 1; + nx = minmn; + } + } + } + } else { + nx = minmn; + } + + i__1 = minmn - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + +/* Reduce rows and columns i:i+nb-1 to bidiagonal form and return */ +/* the matrices X and Y which are needed to update the unreduced */ +/* part of the matrix */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ + 1; + dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[ + i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx + * nb + 1], &ldwrky); + +/* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update */ +/* of the form A := A - V*Y**T - X*U**T */ + + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &i__4, &nb, &c_b21, &a[i__ + + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], & + ldwrky, &c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda, ( + ftnlen)12, (ftnlen)9); + i__3 = *m - i__ - nb + 1; + i__4 = *n - i__ - nb + 1; + dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &nb, &c_b21, & + work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, & + c_b22, &a[i__ + nb + (i__ + nb) * a_dim1], lda, (ftnlen)12, ( + ftnlen)12); + +/* Copy diagonal and off-diagonal elements of B back into A */ + + if (*m >= *n) { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + (j + 1) * a_dim1] = e[j]; +/* L10: */ + } + } else { + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + j * a_dim1] = d__[j]; + a[j + 1 + j * a_dim1] = e[j]; +/* L20: */ + } + } +/* L30: */ + } + +/* Use unblocked code to reduce the remainder of the matrix */ + + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], & + tauq[i__], &taup[i__], &work[1], &iinfo); + work[1] = (doublereal) ws; + return 0; + +/* End of DGEBRD */ + +} /* dgebrd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgecon.cpp b/lib/linalg/dgecon.cpp new file mode 100644 index 0000000000..07a305d74f --- /dev/null +++ b/lib/linalg/dgecon.cpp @@ -0,0 +1,313 @@ +/* fortran/dgecon.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DGECON */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGECON + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, */ +/* INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER NORM */ +/* INTEGER INFO, LDA, N */ +/* DOUBLE PRECISION ANORM, RCOND */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGECON estimates the reciprocal of the condition number of a general */ +/* > real matrix A, in either the 1-norm or the infinity-norm, using */ +/* > the LU factorization computed by DGETRF. */ +/* > */ +/* > An estimate is obtained for norm(inv(A)), and the reciprocal of the */ +/* > condition number is computed as */ +/* > RCOND = 1 / ( norm(A) * norm(inv(A)) ). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies whether the 1-norm condition number or the */ +/* > infinity-norm condition number is required: */ +/* > = '1' or 'O': 1-norm; */ +/* > = 'I': Infinity-norm. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by DGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ANORM */ +/* > \verbatim */ +/* > ANORM is DOUBLE PRECISION */ +/* > If NORM = '1' or 'O', the 1-norm of the original matrix A. */ +/* > If NORM = 'I', the infinity-norm of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The reciprocal of the condition number of the matrix A, */ +/* > computed as RCOND = 1/(norm(A) * norm(inv(A))). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgecon_(char *norm, integer *n, doublereal *a, integer * + lda, doublereal *anorm, doublereal *rcond, doublereal *work, integer * + iwork, integer *info, ftnlen norm_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + doublereal d__1; + + /* Local variables */ + doublereal sl; + integer ix; + doublereal su; + integer kase, kase1; + doublereal scale; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer isave[3]; + extern /* Subroutine */ int drscl_(integer *, doublereal *, doublereal *, + integer *), dlacn2_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *); + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal ainvnm; + extern /* Subroutine */ int dlatrs_(char *, char *, char *, char *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical onenrm; + char normin[1]; + doublereal smlnum; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + onenrm = *(unsigned char *)norm == '1' || lsame_(norm, (char *)"O", (ftnlen)1, ( + ftnlen)1); + if (! onenrm && ! lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*anorm < 0.) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGECON", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *rcond = 0.; + if (*n == 0) { + *rcond = 1.; + return 0; + } else if (*anorm == 0.) { + return 0; + } + + smlnum = dlamch_((char *)"Safe minimum", (ftnlen)12); + +/* Estimate the norm of inv(A). */ + + ainvnm = 0.; + *(unsigned char *)normin = 'N'; + if (onenrm) { + kase1 = 1; + } else { + kase1 = 2; + } + kase = 0; +L10: + dlacn2_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase, isave); + if (kase != 0) { + if (kase == kase1) { + +/* Multiply by inv(L). */ + + dlatrs_((char *)"Lower", (char *)"No transpose", (char *)"Unit", normin, n, &a[a_offset], + lda, &work[1], &sl, &work[(*n << 1) + 1], info, (ftnlen)5, + (ftnlen)12, (ftnlen)4, (ftnlen)1); + +/* Multiply by inv(U). */ + + dlatrs_((char *)"Upper", (char *)"No transpose", (char *)"Non-unit", normin, n, &a[ + a_offset], lda, &work[1], &su, &work[*n * 3 + 1], info, ( + ftnlen)5, (ftnlen)12, (ftnlen)8, (ftnlen)1); + } else { + +/* Multiply by inv(U**T). */ + + dlatrs_((char *)"Upper", (char *)"Transpose", (char *)"Non-unit", normin, n, &a[a_offset], + lda, &work[1], &su, &work[*n * 3 + 1], info, (ftnlen)5, ( + ftnlen)9, (ftnlen)8, (ftnlen)1); + +/* Multiply by inv(L**T). */ + + dlatrs_((char *)"Lower", (char *)"Transpose", (char *)"Unit", normin, n, &a[a_offset], + lda, &work[1], &sl, &work[(*n << 1) + 1], info, (ftnlen)5, + (ftnlen)9, (ftnlen)4, (ftnlen)1); + } + +/* Divide X by 1/(SL*SU) if doing so will not cause overflow. */ + + scale = sl * su; + *(unsigned char *)normin = 'Y'; + if (scale != 1.) { + ix = idamax_(n, &work[1], &c__1); + if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) + { + goto L20; + } + drscl_(n, &scale, &work[1], &c__1); + } + goto L10; + } + +/* Compute the estimate of the reciprocal condition number. */ + + if (ainvnm != 0.) { + *rcond = 1. / ainvnm / *anorm; + } + +L20: + return 0; + +/* End of DGECON */ + +} /* dgecon_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgelq2.cpp b/lib/linalg/dgelq2.cpp new file mode 100644 index 0000000000..6000253184 --- /dev/null +++ b/lib/linalg/dgelq2.cpp @@ -0,0 +1,245 @@ +/* fortran/dgelq2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELQ2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQ2 computes an LQ factorization of a real m-by-n matrix A: */ +/* > */ +/* > A = ( L 0 ) * Q */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a n-by-n orthogonal matrix; */ +/* > L is a lower-triangular m-by-m matrix; */ +/* > 0 is a m-by-(n-m) zero matrix, if m < n. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the m by min(m,n) lower trapezoidal matrix L (L is */ +/* > lower triangular if m <= n); the elements above the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = min(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + doublereal aii; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGELQ2", &i__1, (ftnlen)6); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1] + , lda, &tau[i__]); + if (i__ < *m) { + +/* Apply H(i) to A(i+1:m,i:n) from the right */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dlarf_((char *)"Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[ + i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1], (ftnlen) + 5); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGELQ2 */ + +} /* dgelq2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgelqf.cpp b/lib/linalg/dgelqf.cpp new file mode 100644 index 0000000000..ede5b8198b --- /dev/null +++ b/lib/linalg/dgelqf.cpp @@ -0,0 +1,345 @@ +/* fortran/dgelqf.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* > \brief \b DGELQF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELQF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELQF computes an LQ factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = ( L 0 ) * Q */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a N-by-N orthogonal matrix; */ +/* > L is a lower-triangular M-by-M matrix; */ +/* > 0 is a M-by-(N-M) zero matrix, if M < N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and below the diagonal of the array */ +/* > contain the m-by-min(m,n) lower trapezoidal matrix L (L is */ +/* > lower triangular if m <= n); the elements above the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,M). */ +/* > For optimum performance LWORK >= M*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1), where k = min(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lwkopt = *m * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGELQF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + k = min(*m,*n); + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGELQF", (char *)" ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DGELQF", (char *)" ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Compute the LQ factorization of the current block */ +/* A(i:i+ib-1,i:n) */ + + i__3 = *n - i__ + 1; + dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *m) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *n - i__ + 1; + dlarft_((char *)"Forward", (char *)"Rowwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork, (ftnlen)7, + (ftnlen)7); + +/* Apply H to A(i+ib:m,i:n) from the right */ + + i__3 = *m - i__ - ib + 1; + i__4 = *n - i__ + 1; + dlarfb_((char *)"Right", (char *)"No transpose", (char *)"Forward", (char *)"Rowwise", &i__3, + &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork, (ftnlen)5, (ftnlen)12, (ftnlen)7, ( + ftnlen)7); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGELQF */ + +} /* dgelqf_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgelsd.cpp b/lib/linalg/dgelsd.cpp new file mode 100644 index 0000000000..7a74d30ef2 --- /dev/null +++ b/lib/linalg/dgelsd.cpp @@ -0,0 +1,816 @@ +/* fortran/dgelsd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__6 = 6; +static integer c_n1 = -1; +static integer c__9 = 9; +static integer c__0 = 0; +static integer c__1 = 1; +static doublereal c_b82 = 0.; + +/* > \brief DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELSD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, */ +/* WORK, LWORK, IWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */ +/* DOUBLE PRECISION RCOND */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELSD computes the minimum-norm solution to a real linear least */ +/* > squares problem: */ +/* > minimize 2-norm(| b - A*x |) */ +/* > using the singular value decomposition (SVD) of A. A is an M-by-N */ +/* > matrix which may be rank-deficient. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ +/* > matrix X. */ +/* > */ +/* > The problem is solved in three steps: */ +/* > (1) Reduce the coefficient matrix A to bidiagonal form with */ +/* > Householder transformations, reducing the original problem */ +/* > into a (char *)"bidiagonal least squares problem" (BLS) */ +/* > (2) Solve the BLS using a divide and conquer approach. */ +/* > (3) Apply back all the Householder transformations to solve */ +/* > the original least squares problem. */ +/* > */ +/* > The effective rank of A is determined by treating as zero those */ +/* > singular values which are less than RCOND times the largest singular */ +/* > value. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, A has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the M-by-NRHS right hand side matrix B. */ +/* > On exit, B is overwritten by the N-by-NRHS solution */ +/* > matrix X. If m >= n and RANK = n, the residual */ +/* > sum-of-squares for the solution in the i-th column is given */ +/* > by the sum of squares of elements n+1:m in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,max(M,N)). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The singular values of A in decreasing order. */ +/* > The condition number of A in the 2-norm = S(1)/S(min(m,n)). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > RCOND is used to determine the effective rank of A. */ +/* > Singular values S(i) <= RCOND*S(1) are treated as zero. */ +/* > If RCOND < 0, machine precision is used instead. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RANK */ +/* > \verbatim */ +/* > RANK is INTEGER */ +/* > The effective rank of A, i.e., the number of singular values */ +/* > which are greater than RCOND*S(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK must be at least 1. */ +/* > The exact minimum amount of workspace needed depends on M, */ +/* > N and NRHS. As long as LWORK is at least */ +/* > 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, */ +/* > if M is greater than or equal to N or */ +/* > 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, */ +/* > if M is less than N, the code will execute correctly. */ +/* > SMLSIZ is returned by ILAENV and is equal to the maximum */ +/* > size of the subproblems at the bottom of the computation */ +/* > tree (usually about 25), and */ +/* > NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN), */ +/* > where MINMN = MIN( M,N ). */ +/* > On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: the algorithm for computing the SVD failed to converge; */ +/* > if INFO = i, i off-diagonal elements of an intermediate */ +/* > bidiagonal form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEsolve */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ + +/* ===================================================================== */ +/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer ie, il, mm; + doublereal eps, anrm, bnrm; + integer itau, nlvl, iascl, ibscl; + doublereal sfmin; + integer minmn, maxmn, itaup, itauq, mnthr, nwork; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + extern doublereal dlamch_(char *, ftnlen), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, ftnlen); + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlalsd_(char *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, ftnlen), dlascl_(char *, + integer *, integer *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, ftnlen), dgeqrf_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), dlaset_(char *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, ftnlen), xerbla_(char *, + integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + doublereal bignum; + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + ftnlen, ftnlen, ftnlen); + integer wlalsd; + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, ftnlen, ftnlen); + integer ldwork; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, ftnlen, ftnlen); + integer liwork, minwrk, maxwrk; + doublereal smlnum; + logical lquery; + integer smlsiz; + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + mnthr = ilaenv_(&c__6, (char *)"DGELSD", (char *)" ", m, n, nrhs, &c_n1, (ftnlen)6, ( + ftnlen)1); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + + smlsiz = ilaenv_(&c__9, (char *)"DGELSD", (char *)" ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* Compute workspace. */ +/* (Note: Comments in the code beginning (char *)"Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + minwrk = 1; + liwork = 1; + minmn = max(1,minmn); +/* Computing MAX */ + i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) / + log(2.)) + 1; + nlvl = max(i__1,0); + + if (*info == 0) { + maxwrk = 0; + liwork = minmn * 3 * nlvl + minmn * 11; + mm = *m; + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, (char *)"DORMQR", (char *)"LT", + m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, (char *)"DGEBRD" + , (char *)" ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR", + (char *)"QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, (char *)"DORMBR", + (char *)"PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * * + nrhs + i__1 * i__1; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + wlalsd; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2), + i__2 = *n * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + if (*n > *m) { +/* Computing 2nd power */ + i__1 = smlsiz + 1; + wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * * + nrhs + i__1 * i__1; + if (*n >= mnthr) { + +/* Path 2a - underdetermined, with many more columns */ +/* than rows. */ + + maxwrk = *m + *m * ilaenv_(&c__1, (char *)"DGELQF", (char *)" ", m, n, &c_n1, + &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * + ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, m, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& + c__1, (char *)"DORMBR", (char *)"QLT", m, nrhs, m, &c_n1, (ftnlen)6, ( + ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * + ilaenv_(&c__1, (char *)"DORMBR", (char *)"PLN", m, nrhs, m, &c_n1, ( + ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, (char *)"DORMLQ", + (char *)"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; + maxwrk = max(i__1,i__2); +/* XXX: Ensure the Path 2a case below is triggered. The workspace */ +/* calculation should use queries for all routines eventually. */ +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4); + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - remaining underdetermined cases. */ + + maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, (char *)"DGEBRD", (char *)" ", m, + n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, (char *)"DORMBR" + , (char *)"QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, (char *)"DORMBR", + (char *)"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + wlalsd; + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2), + i__2 = *m * 3 + wlalsd; + minwrk = max(i__1,i__2); + } + minwrk = min(minwrk,maxwrk); + work[1] = (doublereal) maxwrk; + iwork[1] = liwork; + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGELSD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + goto L10; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters. */ + + eps = dlamch_((char *)"P", (ftnlen)1); + sfmin = dlamch_((char *)"S", (ftnlen)1); + smlnum = sfmin / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ + + anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info, (ftnlen)1); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info, (ftnlen)1); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb, (ftnlen) + 1); + dlaset_((char *)"F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1, (ftnlen)1); + *rank = 0; + goto L10; + } + +/* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ + + bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM. */ + + dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM. */ + + dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + ibscl = 2; + } + +/* If M < N make sure certain entries of B are zero. */ + + if (*m < *n) { + i__1 = *n - *m; + dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], ldb, ( + ftnlen)1); + } + +/* Overdetermined case. */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined. */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns. */ + + mm = *n; + itau = 1; + nwork = itau + *n; + +/* Compute A=Q*R. */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + +/* Multiply B by transpose(Q). */ +/* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info, (ftnlen)1, ( + ftnlen)1); + +/* Zero out below R. */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__1, &i__2, &c_b82, &c_b82, &a[a_dim1 + 2], + lda, (ftnlen)1); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + +/* Bidiagonalize R in A. */ +/* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ + + i__1 = *lwork - nwork + 1; + dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of R. */ +/* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[nwork], &i__1, info, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_((char *)"U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, + rcond, rank, &work[nwork], &iwork[1], info, (ftnlen)1); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of R. */ + + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & + b[b_offset], ldb, &work[nwork], &i__1, info, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( + i__1,*nrhs), i__2 = *n - *m * 3, i__1 = max(i__1,i__2); + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,wlalsd)) { + +/* Path 2a - underdetermined, with many more columns than rows */ +/* and sufficient workspace for an efficient algorithm. */ + + ldwork = *m; +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + + *m + *m * *nrhs, i__1 = max(i__1,i__2), i__2 = (*m << 2) + + *m * *lda + wlalsd; + if (*lwork >= max(i__1,i__2)) { + ldwork = *lda; + } + itau = 1; + nwork = *m + 1; + +/* Compute A=L*Q. */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, + info); + il = nwork; + +/* Copy L to WORK(IL), zeroing out above its diagonal. */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen) + 1); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_((char *)"U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], & + ldwork, (ftnlen)1); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL). */ +/* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of L. */ +/* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[nwork], &i__1, info, ( + ftnlen)1, (ftnlen)1, (ftnlen)1); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_((char *)"U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info, (ftnlen) + 1); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of L. */ + + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"N", m, nrhs, m, &work[il], &ldwork, &work[ + itaup], &b[b_offset], ldb, &work[nwork], &i__1, info, ( + ftnlen)1, (ftnlen)1, (ftnlen)1); + +/* Zero out below first M rows of B. */ + + i__1 = *n - *m; + dlaset_((char *)"F", &i__1, nrhs, &c_b82, &c_b82, &b[*m + 1 + b_dim1], + ldb, (ftnlen)1); + nwork = itau + *m; + +/* Multiply transpose(Q) by B. */ +/* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[nwork], &i__1, info, (ftnlen)1, ( + ftnlen)1); + + } else { + +/* Path 2 - remaining underdetermined cases. */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + +/* Bidiagonalize A. */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__1 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[nwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors. */ +/* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ + + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[nwork], &i__1, info, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + +/* Solve the bidiagonal least squares problem. */ + + dlalsd_((char *)"L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], + ldb, rcond, rank, &work[nwork], &iwork[1], info, (ftnlen) + 1); + if (*info != 0) { + goto L10; + } + +/* Multiply B by right bidiagonalizing vectors of A. */ + + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"N", n, nrhs, m, &a[a_offset], lda, &work[itaup] + , &b[b_offset], ldb, &work[nwork], &i__1, info, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + + } + } + +/* Undo scaling. */ + + if (iascl == 1) { + dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info, (ftnlen)1); + } else if (iascl == 2) { + dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info, (ftnlen)1); + } + if (ibscl == 1) { + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + } else if (ibscl == 2) { + dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + } + +L10: + work[1] = (doublereal) maxwrk; + iwork[1] = liwork; + return 0; + +/* End of DGELSD */ + +} /* dgelsd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgelss.cpp b/lib/linalg/dgelss.cpp new file mode 100644 index 0000000000..6377208923 --- /dev/null +++ b/lib/linalg/dgelss.cpp @@ -0,0 +1,988 @@ +/* fortran/dgelss.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__6 = 6; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b46 = 0.; +static integer c__1 = 1; +static doublereal c_b79 = 1.; + +/* > \brief DGELSS solves overdetermined or underdetermined systems for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGELSS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */ +/* DOUBLE PRECISION RCOND */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGELSS computes the minimum norm solution to a real linear least */ +/* > squares problem: */ +/* > */ +/* > Minimize 2-norm(| b - A*x |). */ +/* > */ +/* > using the singular value decomposition (SVD) of A. A is an M-by-N */ +/* > matrix which may be rank-deficient. */ +/* > */ +/* > Several right hand side vectors b and solution vectors x can be */ +/* > handled in a single call; they are stored as the columns of the */ +/* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix */ +/* > X. */ +/* > */ +/* > The effective rank of A is determined by treating as zero those */ +/* > singular values which are less than RCOND times the largest singular */ +/* > value. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrices B and X. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the first min(m,n) rows of A are overwritten with */ +/* > its right singular vectors, stored rowwise. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the M-by-NRHS right hand side matrix B. */ +/* > On exit, B is overwritten by the N-by-NRHS solution */ +/* > matrix X. If m >= n and RANK = n, the residual */ +/* > sum-of-squares for the solution in the i-th column is given */ +/* > by the sum of squares of elements n+1:m in that column. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,max(M,N)). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The singular values of A in decreasing order. */ +/* > The condition number of A in the 2-norm = S(1)/S(min(m,n)). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > RCOND is used to determine the effective rank of A. */ +/* > Singular values S(i) <= RCOND*S(1) are treated as zero. */ +/* > If RCOND < 0, machine precision is used instead. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RANK */ +/* > \verbatim */ +/* > RANK is INTEGER */ +/* > The effective rank of A, i.e., the number of singular values */ +/* > which are greater than RCOND*S(1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1, and also: */ +/* > LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: the algorithm for computing the SVD failed to converge; */ +/* > if INFO = i, i off-diagonal elements of an intermediate */ +/* > bidiagonal form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgelss_(integer *m, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * + s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + doublereal d__1; + + /* Local variables */ + integer i__, bl, ie, il, mm; + doublereal dum[1], eps, thr, anrm, bnrm; + integer itau, lwork_dgebrd__, lwork_dgelqf__, lwork_dgeqrf__, + lwork_dorgbr__, lwork_dormbr__, lwork_dormlq__, lwork_dormqr__; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + integer iascl, ibscl; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen), drscl_(integer *, + doublereal *, doublereal *, integer *); + integer chunk; + doublereal sfmin; + integer minmn; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer maxmn, itaup, itauq, mnthr, iwork; + extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_( + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *); + extern doublereal dlamch_(char *, ftnlen), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, ftnlen); + integer bdspac; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *, ftnlen), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen), dbdsqr_(char *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen), dorgbr_(char *, + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, ftnlen); + doublereal bignum; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + ftnlen, ftnlen, ftnlen), dormlq_(char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, ftnlen, ftnlen); + integer minwrk, maxwrk; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --s; + --work; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + maxmn = max(*m,*n); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*ldb < max(1,maxmn)) { + *info = -7; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning (char *)"Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (minmn > 0) { + mm = *m; + mnthr = ilaenv_(&c__6, (char *)"DGELSS", (char *)" ", m, n, nrhs, &c_n1, (ftnlen) + 6, (ftnlen)1); + if (*m >= *n && *m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than */ +/* columns */ + +/* Compute space needed for DGEQRF */ + dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info); + lwork_dgeqrf__ = (integer) dum[0]; +/* Compute space needed for DORMQR */ + dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, &b[ + b_offset], ldb, dum, &c_n1, info, (ftnlen)1, (ftnlen) + 1); + lwork_dormqr__ = (integer) dum[0]; + mm = *n; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_dgeqrf__; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n + lwork_dormqr__; + maxwrk = max(i__1,i__2); + } + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined */ + +/* Compute workspace needed for DBDSQR */ + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 5; + bdspac = max(i__1,i__2); +/* Compute space needed for DGEBRD */ + dgebrd_(&mm, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, + &c_n1, info); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORMBR */ + dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, dum, & + b[b_offset], ldb, dum, &c_n1, info, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + lwork_dormbr__ = (integer) dum[0]; +/* Compute space needed for DORGBR */ + dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, + info, (ftnlen)1); + lwork_dorgbr__ = (integer) dum[0]; +/* Compute total workspace needed */ +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_dgebrd__; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_dormbr__; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * 3 + lwork_dorgbr__; + maxwrk = max(i__1,i__2); + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *nrhs; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1, + i__2); + minwrk = max(i__1,bdspac); + maxwrk = max(minwrk,maxwrk); + } + if (*n > *m) { + +/* Compute workspace needed for DBDSQR */ + +/* Computing MAX */ + i__1 = 1, i__2 = *m * 5; + bdspac = max(i__1,i__2); +/* Computing MAX */ + i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1, + i__2); + minwrk = max(i__1,bdspac); + if (*n >= mnthr) { + +/* Path 2a - underdetermined, with many more columns */ +/* than rows */ + +/* Compute space needed for DGELQF */ + dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, info); + lwork_dgelqf__ = (integer) dum[0]; +/* Compute space needed for DGEBRD */ + dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, + dum, &c_n1, info); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORMBR */ + dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, dum, + &b[b_offset], ldb, dum, &c_n1, info, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + lwork_dormbr__ = (integer) dum[0]; +/* Compute space needed for DORGBR */ + dorgbr_((char *)"P", m, m, m, &a[a_offset], lda, dum, dum, &c_n1, + info, (ftnlen)1); + lwork_dorgbr__ = (integer) dum[0]; +/* Compute space needed for DORMLQ */ + dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, dum, &b[ + b_offset], ldb, dum, &c_n1, info, (ftnlen)1, ( + ftnlen)1); + lwork_dormlq__ = (integer) dum[0]; +/* Compute total workspace needed */ + maxwrk = *m + lwork_dgelqf__; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_dgebrd__; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_dormbr__; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + + lwork_dorgbr__; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + bdspac; + maxwrk = max(i__1,i__2); + if (*nrhs > 1) { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; + maxwrk = max(i__1,i__2); + } else { +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * *m + (*m << 1); + maxwrk = max(i__1,i__2); + } +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m + lwork_dormlq__; + maxwrk = max(i__1,i__2); + } else { + +/* Path 2 - underdetermined */ + +/* Compute space needed for DGEBRD */ + dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, + dum, &c_n1, info); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORMBR */ + dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &a[a_offset], lda, dum, + &b[b_offset], ldb, dum, &c_n1, info, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + lwork_dormbr__ = (integer) dum[0]; +/* Compute space needed for DORGBR */ + dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, dum, dum, &c_n1, + info, (ftnlen)1); + lwork_dorgbr__ = (integer) dum[0]; + maxwrk = *m * 3 + lwork_dgebrd__; +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + lwork_dormbr__; + maxwrk = max(i__1,i__2); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *m * 3 + lwork_dorgbr__; + maxwrk = max(i__1,i__2); + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__1 = maxwrk, i__2 = *n * *nrhs; + maxwrk = max(i__1,i__2); + } + } + maxwrk = max(minwrk,maxwrk); + } + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGELSS", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + *rank = 0; + return 0; + } + +/* Get machine parameters */ + + eps = dlamch_((char *)"P", (ftnlen)1); + sfmin = dlamch_((char *)"S", (ftnlen)1); + smlnum = sfmin / eps; + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, &work[1], (ftnlen)1); + iascl = 0; + if (anrm > 0. && anrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, + info, (ftnlen)1); + iascl = 1; + } else if (anrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, + info, (ftnlen)1); + iascl = 2; + } else if (anrm == 0.) { + +/* Matrix all zero. Return zero solution. */ + + i__1 = max(*m,*n); + dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[b_offset], ldb, (ftnlen) + 1); + dlaset_((char *)"F", &minmn, &c__1, &c_b46, &c_b46, &s[1], &minmn, (ftnlen)1); + *rank = 0; + goto L70; + } + +/* Scale B if max element outside range [SMLNUM,BIGNUM] */ + + bnrm = dlange_((char *)"M", m, nrhs, &b[b_offset], ldb, &work[1], (ftnlen)1); + ibscl = 0; + if (bnrm > 0. && bnrm < smlnum) { + +/* Scale matrix norm up to SMLNUM */ + + dlascl_((char *)"G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + ibscl = 1; + } else if (bnrm > bignum) { + +/* Scale matrix norm down to BIGNUM */ + + dlascl_((char *)"G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + ibscl = 2; + } + +/* Overdetermined case */ + + if (*m >= *n) { + +/* Path 1 - overdetermined or exactly determined */ + + mm = *m; + if (*m >= mnthr) { + +/* Path 1a - overdetermined, with many more rows than columns */ + + mm = *n; + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N+N*NB) */ + + i__1 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, + info); + +/* Multiply B by transpose(Q) */ +/* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + dormqr_((char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[iwork], &i__1, info, (ftnlen)1, ( + ftnlen)1); + +/* Zero out below R */ + + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__1, &i__2, &c_b46, &c_b46, &a[a_dim1 + 2], + lda, (ftnlen)1); + } + } + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ + + i__1 = *lwork - iwork + 1; + dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of R */ +/* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], + &b[b_offset], ldb, &work[iwork], &i__1, info, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + +/* Generate right bidiagonalizing vectors of R in A */ +/* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ + + i__1 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], & + i__1, info, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration */ +/* multiply B by transpose of left singular vectors */ +/* compute right singular vectors in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, + dum, &c__1, &b[b_offset], ldb, &work[iwork], info, (ftnlen)1); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = max(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = max(d__1,sfmin); + } + *rank = 0; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] > thr) { + drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1], + ldb, (ftnlen)1); + } +/* L10: */ + } + +/* Multiply B by right singular vectors */ +/* (Workspace: need N, prefer N*NRHS) */ + + if (*lwork >= *ldb * *nrhs && *nrhs > 1) { + dgemm_((char *)"T", (char *)"N", n, nrhs, n, &c_b79, &a[a_offset], lda, &b[ + b_offset], ldb, &c_b46, &work[1], ldb, (ftnlen)1, (ftnlen) + 1); + dlacpy_((char *)"G", n, nrhs, &work[1], ldb, &b[b_offset], ldb, (ftnlen)1) + ; + } else if (*nrhs > 1) { + chunk = *lwork / *n; + i__1 = *nrhs; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = min(i__3,chunk); + dgemm_((char *)"T", (char *)"N", n, &bl, n, &c_b79, &a[a_offset], lda, &b[i__ + * b_dim1 + 1], ldb, &c_b46, &work[1], n, (ftnlen)1, ( + ftnlen)1); + dlacpy_((char *)"G", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], ldb, ( + ftnlen)1); +/* L20: */ + } + } else { + dgemv_((char *)"T", n, n, &c_b79, &a[a_offset], lda, &b[b_offset], &c__1, + &c_b46, &work[1], &c__1, (ftnlen)1); + dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1); + } + + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2,i__1), i__2 = max( + i__2,*nrhs), i__1 = *n - *m * 3; + if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2,i__1)) { + +/* Path 2a - underdetermined, with many more columns than rows */ +/* and sufficient workspace for an efficient algorithm */ + + ldwork = *m; +/* Computing MAX */ +/* Computing MAX */ + i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = + max(i__3,*nrhs), i__4 = *n - *m * 3; + i__2 = (*m << 2) + *m * *lda + max(i__3,i__4), i__1 = *m * *lda + + *m + *m * *nrhs; + if (*lwork >= max(i__2,i__1)) { + ldwork = *lda; + } + itau = 1; + iwork = *m + 1; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M+M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, + info); + il = iwork; + +/* Copy L to WORK(IL), zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwork, (ftnlen) + 1); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_((char *)"U", &i__2, &i__1, &c_b46, &c_b46, &work[il + ldwork], & + ldwork, (ftnlen)1); + ie = il + ldwork * *m; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IL) */ +/* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], + &work[itaup], &work[iwork], &i__2, info); + +/* Multiply B by transpose of left bidiagonalizing vectors of L */ +/* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, m, &work[il], &ldwork, &work[ + itauq], &b[b_offset], ldb, &work[iwork], &i__2, info, ( + ftnlen)1, (ftnlen)1, (ftnlen)1); + +/* Generate right bidiagonalizing vectors of R in WORK(IL) */ +/* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[il], &ldwork, &work[itaup], &work[ + iwork], &i__2, info, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, */ +/* computing right singular vectors of L in WORK(IL) and */ +/* multiplying B by transpose of left singular vectors */ +/* (Workspace: need M*M+M+BDSPAC) */ + + dbdsqr_((char *)"U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], & + ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork] + , info, (ftnlen)1); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = max(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = max(d__1,sfmin); + } + *rank = 0; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + if (s[i__] > thr) { + drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1] + , ldb, (ftnlen)1); + } +/* L30: */ + } + iwork = ie; + +/* Multiply B by right singular vectors of L in WORK(IL) */ +/* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */ + + if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) { + dgemm_((char *)"T", (char *)"N", m, nrhs, m, &c_b79, &work[il], &ldwork, &b[ + b_offset], ldb, &c_b46, &work[iwork], ldb, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb, ( + ftnlen)1); + } else if (*nrhs > 1) { + chunk = (*lwork - iwork + 1) / *m; + i__2 = *nrhs; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__1) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = min(i__3,chunk); + dgemm_((char *)"T", (char *)"N", m, &bl, m, &c_b79, &work[il], &ldwork, & + b[i__ * b_dim1 + 1], ldb, &c_b46, &work[iwork], m, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"G", m, &bl, &work[iwork], m, &b[i__ * b_dim1 + 1] + , ldb, (ftnlen)1); +/* L40: */ + } + } else { + dgemv_((char *)"T", m, m, &c_b79, &work[il], &ldwork, &b[b_dim1 + 1], + &c__1, &c_b46, &work[iwork], &c__1, (ftnlen)1); + dcopy_(m, &work[iwork], &c__1, &b[b_dim1 + 1], &c__1); + } + +/* Zero out below first M rows of B */ + + i__1 = *n - *m; + dlaset_((char *)"F", &i__1, nrhs, &c_b46, &c_b46, &b[*m + 1 + b_dim1], + ldb, (ftnlen)1); + iwork = itau + *m; + +/* Multiply transpose(Q) by B */ +/* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + dormlq_((char *)"L", (char *)"T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ + b_offset], ldb, &work[iwork], &i__1, info, (ftnlen)1, ( + ftnlen)1); + + } else { + +/* Path 2 - remaining underdetermined cases */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ + + i__1 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__1, info); + +/* Multiply B by transpose of left bidiagonalizing vectors */ +/* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ + + i__1 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"T", m, nrhs, n, &a[a_offset], lda, &work[itauq] + , &b[b_offset], ldb, &work[iwork], &i__1, info, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M+M*NB) */ + + i__1 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__1, info, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, */ +/* computing right singular vectors of A in A and */ +/* multiplying B by transpose of left singular vectors */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], + lda, dum, &c__1, &b[b_offset], ldb, &work[iwork], info, ( + ftnlen)1); + if (*info != 0) { + goto L70; + } + +/* Multiply B by reciprocals of singular values */ + +/* Computing MAX */ + d__1 = *rcond * s[1]; + thr = max(d__1,sfmin); + if (*rcond < 0.) { +/* Computing MAX */ + d__1 = eps * s[1]; + thr = max(d__1,sfmin); + } + *rank = 0; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (s[i__] > thr) { + drscl_(nrhs, &s[i__], &b[i__ + b_dim1], ldb); + ++(*rank); + } else { + dlaset_((char *)"F", &c__1, nrhs, &c_b46, &c_b46, &b[i__ + b_dim1] + , ldb, (ftnlen)1); + } +/* L50: */ + } + +/* Multiply B by right singular vectors of A */ +/* (Workspace: need N, prefer N*NRHS) */ + + if (*lwork >= *ldb * *nrhs && *nrhs > 1) { + dgemm_((char *)"T", (char *)"N", n, nrhs, m, &c_b79, &a[a_offset], lda, &b[ + b_offset], ldb, &c_b46, &work[1], ldb, (ftnlen)1, ( + ftnlen)1); + dlacpy_((char *)"F", n, nrhs, &work[1], ldb, &b[b_offset], ldb, ( + ftnlen)1); + } else if (*nrhs > 1) { + chunk = *lwork / *n; + i__1 = *nrhs; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = *nrhs - i__ + 1; + bl = min(i__3,chunk); + dgemm_((char *)"T", (char *)"N", n, &bl, m, &c_b79, &a[a_offset], lda, &b[ + i__ * b_dim1 + 1], ldb, &c_b46, &work[1], n, ( + ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", n, &bl, &work[1], n, &b[i__ * b_dim1 + 1], + ldb, (ftnlen)1); +/* L60: */ + } + } else { + dgemv_((char *)"T", m, n, &c_b79, &a[a_offset], lda, &b[b_offset], & + c__1, &c_b46, &work[1], &c__1, (ftnlen)1); + dcopy_(n, &work[1], &c__1, &b[b_offset], &c__1); + } + } + } + +/* Undo scaling */ + + if (iascl == 1) { + dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, info, (ftnlen)1); + } else if (iascl == 2) { + dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, info, (ftnlen)1); + } + if (ibscl == 1) { + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + } else if (ibscl == 2) { + dlascl_((char *)"G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + } + +L70: + work[1] = (doublereal) maxwrk; + return 0; + +/* End of DGELSS */ + +} /* dgelss_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgemm.cpp b/lib/linalg/dgemm.cpp new file mode 100644 index 0000000000..2f59801245 --- /dev/null +++ b/lib/linalg/dgemm.cpp @@ -0,0 +1,458 @@ +/* fortran/dgemm.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DGEMM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA,BETA */ +/* INTEGER K,LDA,LDB,LDC,M,N */ +/* CHARACTER TRANSA,TRANSB */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEMM performs one of the matrix-matrix operations */ +/* > */ +/* > C := alpha*op( A )*op( B ) + beta*C, */ +/* > */ +/* > where op( X ) is one of */ +/* > */ +/* > op( X ) = X or op( X ) = X**T, */ +/* > */ +/* > alpha and beta are scalars, and A, B and C are matrices, with op( A ) */ +/* > an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANSA */ +/* > \verbatim */ +/* > TRANSA is CHARACTER*1 */ +/* > On entry, TRANSA specifies the form of op( A ) to be used in */ +/* > the matrix multiplication as follows: */ +/* > */ +/* > TRANSA = 'N' or 'n', op( A ) = A. */ +/* > */ +/* > TRANSA = 'T' or 't', op( A ) = A**T. */ +/* > */ +/* > TRANSA = 'C' or 'c', op( A ) = A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANSB */ +/* > \verbatim */ +/* > TRANSB is CHARACTER*1 */ +/* > On entry, TRANSB specifies the form of op( B ) to be used in */ +/* > the matrix multiplication as follows: */ +/* > */ +/* > TRANSB = 'N' or 'n', op( B ) = B. */ +/* > */ +/* > TRANSB = 'T' or 't', op( B ) = B**T. */ +/* > */ +/* > TRANSB = 'C' or 'c', op( B ) = B**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix */ +/* > op( A ) and of the matrix C. M must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix */ +/* > op( B ) and the number of columns of the matrix C. N must be */ +/* > at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > On entry, K specifies the number of columns of the matrix */ +/* > op( A ) and the number of rows of the matrix op( B ). K must */ +/* > be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is */ +/* > k when TRANSA = 'N' or 'n', and is m otherwise. */ +/* > Before entry with TRANSA = 'N' or 'n', the leading m by k */ +/* > part of the array A must contain the matrix A, otherwise */ +/* > the leading k by m part of the array A must contain the */ +/* > matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When TRANSA = 'N' or 'n' then */ +/* > LDA must be at least max( 1, m ), otherwise LDA must be at */ +/* > least max( 1, k ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is */ +/* > n when TRANSB = 'N' or 'n', and is k otherwise. */ +/* > Before entry with TRANSB = 'N' or 'n', the leading k by n */ +/* > part of the array B must contain the matrix B, otherwise */ +/* > the leading n by k part of the array B must contain the */ +/* > matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > On entry, LDB specifies the first dimension of B as declared */ +/* > in the calling (sub) program. When TRANSB = 'N' or 'n' then */ +/* > LDB must be at least max( 1, k ), otherwise LDB must be at */ +/* > least max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION. */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then C need not be set on input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension ( LDC, N ) */ +/* > Before entry, the leading m by n part of the array C must */ +/* > contain the matrix C, except when beta is zero, in which */ +/* > case C need not be set on entry. */ +/* > On exit, the array C is overwritten by the m by n matrix */ +/* > ( alpha*op( A )*op( B ) + beta*C ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > On entry, LDC specifies the first dimension of C as declared */ +/* > in the calling (sub) program. LDC must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublereal *alpha, doublereal *a, integer *lda, + doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, + integer *ldc, ftnlen transa_len, ftnlen transb_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__, j, l, info; + logical nota, notb; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Set NOTA and NOTB as true if A and B respectively are not */ +/* transposed and set NROWA and NROWB as the number of rows of A */ +/* and B respectively. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1); + notb = lsame_(transb, (char *)"N", (ftnlen)1, (ftnlen)1); + if (nota) { + nrowa = *m; + } else { + nrowa = *k; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1) && ! lsame_( + transa, (char *)"T", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! notb && ! lsame_(transb, (char *)"C", (ftnlen)1, (ftnlen)1) && ! + lsame_(transb, (char *)"T", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_((char *)"DGEMM ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1]; +/* L70: */ + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A**T*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B**T + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L130: */ + } + } else if (*beta != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * a_dim1]; +/* L150: */ + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A**T*B**T + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of DGEMM */ + +} /* dgemm_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgemv.cpp b/lib/linalg/dgemv.cpp new file mode 100644 index 0000000000..1d121c33c0 --- /dev/null +++ b/lib/linalg/dgemv.cpp @@ -0,0 +1,385 @@ +/* fortran/dgemv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DGEMV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA,BETA */ +/* INTEGER INCX,INCY,LDA,M,N */ +/* CHARACTER TRANS */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),X(*),Y(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEMV performs one of the matrix-vector operations */ +/* > */ +/* > y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, */ +/* > */ +/* > where alpha and beta are scalars, x and y are vectors and A is an */ +/* > m by n matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ +/* > */ +/* > TRANS = 'T' or 't' y := alpha*A**T*x + beta*y. */ +/* > */ +/* > TRANS = 'C' or 'c' y := alpha*A**T*x + beta*y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix A. */ +/* > M must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, N ) */ +/* > Before entry, the leading m by n part of the array A must */ +/* > contain the matrix of coefficients. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* > and at least */ +/* > ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* > Before entry, the incremented array X must contain the */ +/* > vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION. */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then Y need not be set on input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Y */ +/* > \verbatim */ +/* > Y is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* > and at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* > Before entry with BETA non-zero, the incremented array Y */ +/* > must contain the vector y. On exit, Y is overwritten by the */ +/* > updated vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > The vector and matrix arguments are not referenced when N = 0, or M = 0 */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy, ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublereal temp; + integer lenx, leny; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"T", ( + ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1) + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_((char *)"DGEMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A**T*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DGEMV */ + +} /* dgemv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgeqr2.cpp b/lib/linalg/dgeqr2.cpp new file mode 100644 index 0000000000..635e360bc7 --- /dev/null +++ b/lib/linalg/dgeqr2.cpp @@ -0,0 +1,250 @@ +/* fortran/dgeqr2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorit +hm. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQR2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQR2 computes a QR factorization of a real m-by-n matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a m-by-m orthogonal matrix; */ +/* > R is an upper-triangular n-by-n matrix; */ +/* > 0 is a (m-n)-by-n zero matrix, if m > n. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the min(m,n) by n upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n); the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors (see Further Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = min(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, k; + doublereal aii; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEQR2", &i__1, (ftnlen)6); + return 0; + } + + k = min(*m,*n); + + i__1 = k; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1] + , &c__1, &tau[i__]); + if (i__ < *n) { + +/* Apply H(i) to A(i:m,i+1:n) from the left */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1], ( + ftnlen)4); + a[i__ + i__ * a_dim1] = aii; + } +/* L10: */ + } + return 0; + +/* End of DGEQR2 */ + +} /* dgeqr2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgeqrf.cpp b/lib/linalg/dgeqrf.cpp new file mode 100644 index 0000000000..fa0aca3f47 --- /dev/null +++ b/lib/linalg/dgeqrf.cpp @@ -0,0 +1,354 @@ +/* fortran/dgeqrf.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* > \brief \b DGEQRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGEQRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGEQRF computes a QR factorization of a real M-by-N matrix A: */ +/* > */ +/* > A = Q * ( R ), */ +/* > ( 0 ) */ +/* > */ +/* > where: */ +/* > */ +/* > Q is a M-by-M orthogonal matrix; */ +/* > R is an upper-triangular N-by-N matrix; */ +/* > 0 is a (M-N)-by-N zero matrix, if M > N. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, the elements on and above the diagonal of the array */ +/* > contain the min(M,N)-by-N upper trapezoidal matrix R (R is */ +/* > upper triangular if m >= n); the elements below the diagonal, */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of min(m,n) elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= 1, if MIN(M,N) = 0, and LWORK >= N, otherwise. */ +/* > For optimum performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrix Q is represented as a product of elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k), where k = min(m,n). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), */ +/* > and tau in TAU(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, k, ib, nb, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *), dlarfb_(char *, + char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + k = min(*m,*n); + *info = 0; + nb = ilaenv_(&c__1, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } else if (! lquery) { + if (*lwork <= 0 || *m > 0 && *lwork < max(1,*n)) { + *info = -7; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEQRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + if (k == 0) { + lwkopt = 1; + } else { + lwkopt = *n * nb; + } + work[1] = (doublereal) lwkopt; + return 0; + } + +/* Quick return if possible */ + + if (k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DGEQRF", (char *)" ", m, n, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < k && nx < k) { + +/* Use blocked code initially */ + + i__1 = k - nx; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__3 = k - i__ + 1; + ib = min(i__3,nb); + +/* Compute the QR factorization of the current block */ +/* A(i:m,i:i+ib-1) */ + + i__3 = *m - i__ + 1; + dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[ + 1], &iinfo); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__3 = *m - i__ + 1; + dlarft_((char *)"Forward", (char *)"Columnwise", &i__3, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork, (ftnlen)7, + (ftnlen)10); + +/* Apply H**T to A(i:m,i+ib:n) from the left */ + + i__3 = *m - i__ + 1; + i__4 = *n - i__ - ib + 1; + dlarfb_((char *)"Left", (char *)"Transpose", (char *)"Forward", (char *)"Columnwise", &i__3, & + i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib + + 1], &ldwork, (ftnlen)4, (ftnlen)9, (ftnlen)7, ( + ftnlen)10); + } +/* L10: */ + } + } else { + i__ = 1; + } + +/* Use unblocked code to factor the last or only block. */ + + if (i__ <= k) { + i__2 = *m - i__ + 1; + i__1 = *n - i__ + 1; + dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1] + , &iinfo); + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGEQRF */ + +} /* dgeqrf_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dger.cpp b/lib/linalg/dger.cpp new file mode 100644 index 0000000000..fd5c4940bf --- /dev/null +++ b/lib/linalg/dger.cpp @@ -0,0 +1,265 @@ +/* fortran/dger.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DGER */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA */ +/* INTEGER INCX,INCY,LDA,M,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),X(*),Y(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGER performs the rank 1 operation */ +/* > */ +/* > A := alpha*x*y**T + A, */ +/* > */ +/* > where alpha is a scalar, x is an m element vector, y is an n element */ +/* > vector and A is an m by n matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix A. */ +/* > M must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( m - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the m */ +/* > element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y */ +/* > \verbatim */ +/* > Y is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* > Before entry, the incremented array Y must contain the n */ +/* > element vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, N ) */ +/* > Before entry, the leading m by n part of the array A must */ +/* > contain the matrix of coefficients. On exit, A is */ +/* > overwritten by the updated matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jy, kx, info; + doublereal temp; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_((char *)"DGER ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0.) { + return 0; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (y[jy] != 0.) { + temp = *alpha * y[jy]; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of DGER */ + +} /* dger_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgesv.cpp b/lib/linalg/dgesv.cpp new file mode 100644 index 0000000000..a71b2caa68 --- /dev/null +++ b/lib/linalg/dgesv.cpp @@ -0,0 +1,217 @@ +/* fortran/dgesv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief DGESV computes the solution to system of linear equations A * X = B for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESV computes the solution to a real system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N matrix and X and B are N-by-NRHS matrices. */ +/* > */ +/* > The LU decomposition with partial pivoting and row interchanges is */ +/* > used to factor A as */ +/* > A = P * L * U, */ +/* > where P is a permutation matrix, L is unit lower triangular, and U is */ +/* > upper triangular. The factored form of A is then used to solve the */ +/* > system of equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the N-by-N coefficient matrix A. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices that define the permutation matrix P; */ +/* > row i of the matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS matrix of right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, so the solution could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer + *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *, + ftnlen), dgetrs_(char *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *, ftnlen); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*nrhs < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGESV ", &i__1, (ftnlen)6); + return 0; + } + +/* Compute the LU factorization of A. */ + + dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + dgetrs_((char *)"No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[ + b_offset], ldb, info, (ftnlen)12); + } + return 0; + +/* End of DGESV */ + +} /* dgesv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgesvd.cpp b/lib/linalg/dgesvd.cpp new file mode 100644 index 0000000000..96fd8735b0 --- /dev/null +++ b/lib/linalg/dgesvd.cpp @@ -0,0 +1,4158 @@ +/* fortran/dgesvd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__6 = 6; +static integer c__0 = 0; +static integer c__2 = 2; +static integer c_n1 = -1; +static doublereal c_b57 = 0.; +static integer c__1 = 1; +static doublereal c_b79 = 1.; + +/* > \brief DGESVD computes the singular value decomposition (SVD) for GE matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGESVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER JOBU, JOBVT */ +/* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGESVD computes the singular value decomposition (SVD) of a real */ +/* > M-by-N matrix A, optionally computing the left and/or right singular */ +/* > vectors. The SVD is written */ +/* > */ +/* > A = U * SIGMA * transpose(V) */ +/* > */ +/* > where SIGMA is an M-by-N matrix which is zero except for its */ +/* > min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */ +/* > V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA */ +/* > are the singular values of A; they are real and non-negative, and */ +/* > are returned in descending order. The first min(m,n) columns of */ +/* > U and V are the left and right singular vectors of A. */ +/* > */ +/* > Note that the routine returns V**T, not V. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBU */ +/* > \verbatim */ +/* > JOBU is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix U: */ +/* > = 'A': all M columns of U are returned in array U: */ +/* > = 'S': the first min(m,n) columns of U (the left singular */ +/* > vectors) are returned in the array U; */ +/* > = 'O': the first min(m,n) columns of U (the left singular */ +/* > vectors) are overwritten on the array A; */ +/* > = 'N': no columns of U (no left singular vectors) are */ +/* > computed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBVT */ +/* > \verbatim */ +/* > JOBVT is CHARACTER*1 */ +/* > Specifies options for computing all or part of the matrix */ +/* > V**T: */ +/* > = 'A': all N rows of V**T are returned in the array VT; */ +/* > = 'S': the first min(m,n) rows of V**T (the right singular */ +/* > vectors) are returned in the array VT; */ +/* > = 'O': the first min(m,n) rows of V**T (the right singular */ +/* > vectors) are overwritten on the array A; */ +/* > = 'N': no rows of V**T (no right singular vectors) are */ +/* > computed. */ +/* > */ +/* > JOBVT and JOBU cannot both be 'O'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the input matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the input matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix A. */ +/* > On exit, */ +/* > if JOBU = 'O', A is overwritten with the first min(m,n) */ +/* > columns of U (the left singular vectors, */ +/* > stored columnwise); */ +/* > if JOBVT = 'O', A is overwritten with the first min(m,n) */ +/* > rows of V**T (the right singular vectors, */ +/* > stored rowwise); */ +/* > if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */ +/* > are destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (min(M,N)) */ +/* > The singular values of A, sorted so that S(i) >= S(i+1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension (LDU,UCOL) */ +/* > (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. */ +/* > If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */ +/* > if JOBU = 'S', U contains the first min(m,n) columns of U */ +/* > (the left singular vectors, stored columnwise); */ +/* > if JOBU = 'N' or 'O', U is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > The leading dimension of the array U. LDU >= 1; if */ +/* > JOBU = 'S' or 'A', LDU >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension (LDVT,N) */ +/* > If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */ +/* > V**T; */ +/* > if JOBVT = 'S', VT contains the first min(m,n) rows of */ +/* > V**T (the right singular vectors, stored rowwise); */ +/* > if JOBVT = 'N' or 'O', VT is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > The leading dimension of the array VT. LDVT >= 1; if */ +/* > JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */ +/* > if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */ +/* > superdiagonal elements of an upper bidiagonal matrix B */ +/* > whose diagonal is in S (not necessarily sorted). B */ +/* > satisfies A = U * B * VT, so it has the same singular values */ +/* > as A, and singular vectors related by U and VT. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): */ +/* > - PATH 1 (M much larger than N, JOBU='N') */ +/* > - PATH 1t (N much larger than M, JOBVT='N') */ +/* > LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if DBDSQR did not converge, INFO specifies how many */ +/* > superdiagonals of an intermediate bidiagonal form B */ +/* > did not converge to zero. See the description of WORK */ +/* > above for details. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEsing */ + +/* ===================================================================== */ +/* Subroutine */ int dgesvd_(char *jobu, char *jobvt, integer *m, integer *n, + doublereal *a, integer *lda, doublereal *s, doublereal *u, integer * + ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork, + integer *info, ftnlen jobu_len, ftnlen jobvt_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], + i__2, i__3, i__4; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + double sqrt(doublereal); + + /* Local variables */ + integer i__, ie, ir, iu, blk, ncu; + doublereal dum[1], eps; + integer nru, iscl; + doublereal anrm; + integer ierr, itau, ncvt, nrvt, lwork_dgebrd__, lwork_dgelqf__, + lwork_dgeqrf__; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer chunk, minmn, wrkbl, itaup, itauq, mnthr, iwork; + logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs; + extern /* Subroutine */ int dgebrd_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + extern doublereal dlamch_(char *, ftnlen), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, ftnlen); + integer bdspac; + extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, integer *, ftnlen), + dgeqrf_(integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dlacpy_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, ftnlen), + dbdsqr_(char *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), dorgbr_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *, ftnlen); + doublereal bignum; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, + ftnlen, ftnlen, ftnlen), dorglq_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + integer ldwrkr, minwrk, ldwrku, maxwrk; + doublereal smlnum; + logical lquery, wntuas, wntvas; + integer lwork_dorgbr_p__, lwork_dorgbr_q__, lwork_dorglq_m__, + lwork_dorglq_n__, lwork_dorgqr_m__, lwork_dorgqr_n__; + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + + /* Function Body */ + *info = 0; + minmn = min(*m,*n); + wntua = lsame_(jobu, (char *)"A", (ftnlen)1, (ftnlen)1); + wntus = lsame_(jobu, (char *)"S", (ftnlen)1, (ftnlen)1); + wntuas = wntua || wntus; + wntuo = lsame_(jobu, (char *)"O", (ftnlen)1, (ftnlen)1); + wntun = lsame_(jobu, (char *)"N", (ftnlen)1, (ftnlen)1); + wntva = lsame_(jobvt, (char *)"A", (ftnlen)1, (ftnlen)1); + wntvs = lsame_(jobvt, (char *)"S", (ftnlen)1, (ftnlen)1); + wntvas = wntva || wntvs; + wntvo = lsame_(jobvt, (char *)"O", (ftnlen)1, (ftnlen)1); + wntvn = lsame_(jobvt, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + + if (! (wntua || wntus || wntuo || wntun)) { + *info = -1; + } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*ldu < 1 || wntuas && *ldu < *m) { + *info = -9; + } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) { + *info = -11; + } + +/* Compute workspace */ +/* (Note: Comments in the code beginning (char *)"Workspace:" describe the */ +/* minimal amount of workspace needed at that point in the code, */ +/* as well as the preferred amount for good performance. */ +/* NB refers to the optimal block size for the immediately */ +/* following subroutine, as returned by ILAENV.) */ + + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + if (*m >= *n && minmn > 0) { + +/* Compute space needed for DBDSQR */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, (char *)"DGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + bdspac = *n * 5; +/* Compute space needed for DGEQRF */ + dgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dgeqrf__ = (integer) dum[0]; +/* Compute space needed for DORGQR */ + dorgqr_(m, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dorgqr_n__ = (integer) dum[0]; + dorgqr_(m, m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dorgqr_m__ = (integer) dum[0]; +/* Compute space needed for DGEBRD */ + dgebrd_(n, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, + &ierr); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORGBR P */ + dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr, ( + ftnlen)1); + lwork_dorgbr_p__ = (integer) dum[0]; +/* Compute space needed for DORGBR Q */ + dorgbr_((char *)"Q", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr, ( + ftnlen)1); + lwork_dorgbr_q__ = (integer) dum[0]; + + if (*m >= mnthr) { + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ + + maxwrk = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dgebrd__; + maxwrk = max(i__2,i__3); + if (wntvo || wntvas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_p__; + maxwrk = max(i__2,i__3); + } + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *n << 2; + minwrk = max(i__2,bdspac); + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; + maxwrk = max(i__2,i__3); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n; + maxwrk = max(i__2,i__3); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntus && wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntus && wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = (*n << 1) * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntus && wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_n__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntua && wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntua && wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = (*n << 1) * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } else if (wntua && wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */ +/* 'A') */ + + wrkbl = *n + lwork_dgeqrf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n + lwork_dorgqr_m__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *n * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *n * *n + wrkbl; +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } + } else { + +/* Path 10 (M at least N, but not much larger) */ + + dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, & + c_n1, &ierr); + lwork_dgebrd__ = (integer) dum[0]; + maxwrk = *n * 3 + lwork_dgebrd__; + if (wntus || wntuo) { + dorgbr_((char *)"Q", m, n, n, &a[a_offset], lda, dum, dum, &c_n1, + &ierr, (ftnlen)1); + lwork_dorgbr_q__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_q__; + maxwrk = max(i__2,i__3); + } + if (wntua) { + dorgbr_((char *)"Q", m, m, n, &a[a_offset], lda, dum, dum, &c_n1, + &ierr, (ftnlen)1); + lwork_dorgbr_q__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_q__; + maxwrk = max(i__2,i__3); + } + if (! wntvn) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *n * 3 + lwork_dorgbr_p__; + maxwrk = max(i__2,i__3); + } + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *n * 3 + *m; + minwrk = max(i__2,bdspac); + } + } else if (minmn > 0) { + +/* Compute space needed for DBDSQR */ + +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = jobu; + i__1[1] = 1, a__1[1] = jobvt; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + mnthr = ilaenv_(&c__6, (char *)"DGESVD", ch__1, m, n, &c__0, &c__0, ( + ftnlen)6, (ftnlen)2); + bdspac = *m * 5; +/* Compute space needed for DGELQF */ + dgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dgelqf__ = (integer) dum[0]; +/* Compute space needed for DORGLQ */ + dorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr); + lwork_dorglq_n__ = (integer) dum[0]; + dorglq_(m, n, m, &a[a_offset], lda, dum, dum, &c_n1, &ierr); + lwork_dorglq_m__ = (integer) dum[0]; +/* Compute space needed for DGEBRD */ + dgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1, + &ierr); + lwork_dgebrd__ = (integer) dum[0]; +/* Compute space needed for DORGBR P */ + dorgbr_((char *)"P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr, ( + ftnlen)1); + lwork_dorgbr_p__ = (integer) dum[0]; +/* Compute space needed for DORGBR Q */ + dorgbr_((char *)"Q", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr, ( + ftnlen)1); + lwork_dorgbr_q__ = (integer) dum[0]; + if (*n >= mnthr) { + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ + + maxwrk = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dgebrd__; + maxwrk = max(i__2,i__3); + if (wntuo || wntuas) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_q__; + maxwrk = max(i__2,i__3); + } + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *m << 2; + minwrk = max(i__2,bdspac); + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; + maxwrk = max(i__2,i__3); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='O') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); +/* Computing MAX */ + i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m; + maxwrk = max(i__2,i__3); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntvs && wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntvs && wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = (*m << 1) * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntvs && wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_m__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntva && wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntva && wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = (*m << 1) * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } else if (wntva && wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ + + wrkbl = *m + lwork_dgelqf__; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m + lwork_dorglq_n__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dgebrd__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_p__; + wrkbl = max(i__2,i__3); +/* Computing MAX */ + i__2 = wrkbl, i__3 = *m * 3 + lwork_dorgbr_q__; + wrkbl = max(i__2,i__3); + wrkbl = max(wrkbl,bdspac); + maxwrk = *m * *m + wrkbl; +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } + } else { + +/* Path 10t(N greater than M, but not much larger) */ + + dgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, & + c_n1, &ierr); + lwork_dgebrd__ = (integer) dum[0]; + maxwrk = *m * 3 + lwork_dgebrd__; + if (wntvs || wntvo) { +/* Compute space needed for DORGBR P */ + dorgbr_((char *)"P", m, n, m, &a[a_offset], n, dum, dum, &c_n1, & + ierr, (ftnlen)1); + lwork_dorgbr_p__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_p__; + maxwrk = max(i__2,i__3); + } + if (wntva) { + dorgbr_((char *)"P", n, n, m, &a[a_offset], n, dum, dum, &c_n1, & + ierr, (ftnlen)1); + lwork_dorgbr_p__ = (integer) dum[0]; +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_p__; + maxwrk = max(i__2,i__3); + } + if (! wntun) { +/* Computing MAX */ + i__2 = maxwrk, i__3 = *m * 3 + lwork_dorgbr_q__; + maxwrk = max(i__2,i__3); + } + maxwrk = max(maxwrk,bdspac); +/* Computing MAX */ + i__2 = *m * 3 + *n; + minwrk = max(i__2,bdspac); + } + } + maxwrk = max(maxwrk,minwrk); + work[1] = (doublereal) maxwrk; + + if (*lwork < minwrk && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"DGESVD", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Get machine constants */ + + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = sqrt(dlamch_((char *)"S", (ftnlen)1)) / eps; + bignum = 1. / smlnum; + +/* Scale A if max element outside range [SMLNUM,BIGNUM] */ + + anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, dum, (ftnlen)1); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, & + ierr, (ftnlen)1); + } else if (anrm > bignum) { + iscl = 1; + dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, & + ierr, (ftnlen)1); + } + + if (*m >= *n) { + +/* A has at least as many rows as columns. If A has sufficiently */ +/* more rows than columns, first reduce using the QR */ +/* decomposition (if sufficient workspace available) */ + + if (*m >= mnthr) { + + if (wntun) { + +/* Path 1 (M much larger than N, JOBU='N') */ +/* No left singular vectors to be computed */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out below R */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &a[a_dim1 + 2], + lda, (ftnlen)1); + } + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + ncvt = 0; + if (wntvo || wntvas) { + +/* If right singular vectors desired, generate P'. */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__2, &ierr, (ftnlen)1); + ncvt = *n; + } + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A if desired */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], + info, (ftnlen)1); + +/* If right singular vectors desired in VT, copy them there */ + + if (wntvas) { + dlacpy_((char *)"F", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + } + + } else if (wntuo && wntvn) { + +/* Path 2 (M much larger than N, JOBU='O', JOBVT='N') */ +/* N left singular vectors to be overwritten on A and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *n; + if (*lwork >= max(i__2,i__3) + *lda * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *n; + if (*lwork >= max(i__2,i__3) + *n * *n) { + +/* WORK(IU) is LDA by N, WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N, WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n - *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy R to WORK(IR) and zero out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr, + (ftnlen)1); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 1], + &ldwrkr, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Generate left vectors bidiagonalizing R */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_((char *)"U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, & + c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork] + , info, (ftnlen)1); + iu = ie + *n; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (Workspace: need N*N + 2*N, prefer N*N + M*N + N) */ + + i__2 = *m; + i__3 = ldwrku; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = min(i__4,ldwrku); + dgemm_((char *)"N", (char *)"N", &chunk, n, n, &c_b79, &a[i__ + + a_dim1], lda, &work[ir], &ldwrkr, &c_b57, & + work[iu], &ldwrku, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda, (ftnlen)1); +/* L10: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB) */ + + i__3 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate left vectors bidiagonalizing A */ +/* (Workspace: need 4*N, prefer 3*N + N*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, n, n, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__3, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, & + c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], + info, (ftnlen)1); + + } + + } else if (wntuo && wntvas) { + +/* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */ +/* N left singular vectors to be overwritten on A and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__3 = *n << 2; + if (*lwork >= *n * *n + max(i__3,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *n; + if (*lwork >= max(i__3,i__2) + *lda * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *n; + if (*lwork >= max(i__3,i__2) + *n * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ldwrkr = *n; + } else { + +/* WORK(IU) is LDWRKU by N and WORK(IR) is N by N */ + + ldwrku = (*lwork - *n * *n - *n) / *n; + ldwrkr = *n; + } + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__3 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__3, &ierr); + +/* Copy R to VT, zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + if (*n > 1) { + i__3 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__3, &i__2, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt, (ftnlen)1); + } + +/* Generate Q in A */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__3 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT, copying result to WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__3 = *lwork - iwork + 1; + dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], &i__3, & + ierr); + dlacpy_((char *)"L", n, n, &vt[vt_offset], ldvt, &work[ir], & + ldwrkr, (ftnlen)1); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_((char *)"Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], & + work[iwork], &i__3, &ierr, (ftnlen)1); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__3, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) and computing right */ +/* singular vectors of R in VT */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_((char *)"U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info, (ftnlen)1); + iu = ie + *n; + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in WORK(IU) and copying to A */ +/* (Workspace: need N*N + 2*N, prefer N*N + M*N + N) */ + + i__3 = *m; + i__2 = ldwrku; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *m - i__ + 1; + chunk = min(i__4,ldwrku); + dgemm_((char *)"N", (char *)"N", &chunk, n, n, &c_b79, &a[i__ + + a_dim1], lda, &work[ir], &ldwrkr, &c_b57, & + work[iu], &ldwrku, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", &chunk, n, &work[iu], &ldwrku, &a[i__ + + a_dim1], lda, (ftnlen)1); +/* L20: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt, (ftnlen)1); + } + +/* Generate Q in A */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], &i__2, & + ierr); + +/* Multiply Q in A by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"R", (char *)"N", m, n, n, &vt[vt_offset], ldvt, & + work[itauq], &a[a_offset], lda, &work[iwork], & + i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + +/* Generate right vectors bidiagonalizing R in VT */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info, (ftnlen)1); + + } + + } else if (wntus) { + + if (wntvn) { + +/* Path 4 (M much larger than N, JOBU='S', JOBVT='N') */ +/* N left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr, (ftnlen)1); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + 1], &ldwrkr, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left vectors bidiagonalizing R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_((char *)"U", n, &c__0, n, &c__0, &s[1], &work[ie], + dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & + work[iwork], info, (ftnlen)1); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IR), storing result in U */ +/* (Workspace: need N*N) */ + + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b79, &a[a_offset], lda, & + work[ir], &ldwrkr, &c_b57, &u[u_offset], ldu, + (ftnlen)1, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda, (ftnlen)1); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"R", (char *)"N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, &c__0, m, &c__0, &s[1], &work[ie], + dum, &c__1, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info, (ftnlen)1); + + } + + } else if (wntvo) { + +/* Path 5 (M much larger than N, JOBU='S', JOBVT='O') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= (*n << 1) * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku, (ftnlen)1); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*N*N + 4*N, */ +/* prefer 2*N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_((char *)"U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr, (ftnlen)1); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*N*N + 4*N-1, */ +/* prefer 2*N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (Workspace: need 2*N*N + BDSPAC) */ + + dbdsqr_((char *)"U", n, n, n, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, + &work[iwork], info, (ftnlen)1); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (Workspace: need N*N) */ + + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b79, &a[a_offset], lda, & + work[iu], &ldwrku, &c_b57, &u[u_offset], ldu, + (ftnlen)1, (ftnlen)1); + +/* Copy right singular vectors of R to A */ +/* (Workspace: need N*N) */ + + dlacpy_((char *)"F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda, (ftnlen)1); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left vectors bidiagonalizing R */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"R", (char *)"N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1) + ; + +/* Generate right vectors bidiagonalizing R in A */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, + &work[iwork], info, (ftnlen)1); + + } + + } else if (wntvas) { + +/* Path 6 (M much larger than N, JOBU='S', JOBVT='S' */ +/* or 'A') */ +/* N left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n << 2; + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku, (ftnlen)1); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_((char *)"U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need N*N + 4*N-1, */ +/* prefer N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr, (ftnlen)1) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_((char *)"U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, dum, & + c__1, &work[iwork], info, (ftnlen)1); + +/* Multiply Q in A by left singular vectors of R in */ +/* WORK(IU), storing result in U */ +/* (Workspace: need N*N) */ + + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b79, &a[a_offset], lda, & + work[iu], &ldwrku, &c_b57, &u[u_offset], ldu, + (ftnlen)1, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to VT, zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt, (ftnlen)1); + } + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"R", (char *)"N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen) + 1); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr, (ftnlen)1) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info, (ftnlen)1); + + } + + } + + } else if (wntua) { + + if (wntvn) { + +/* Path 7 (M much larger than N, JOBU='A', JOBVT='N') */ +/* M left singular vectors to be computed in U and */ +/* no right singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IR) is LDA by N */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is N by N */ + + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Copy R to WORK(IR), zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], & + ldwrkr, (ftnlen)1); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + 1], &ldwrkr, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need N*N + N + M, prefer N*N + N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", n, n, n, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IR) */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_((char *)"U", n, &c__0, n, &c__0, &s[1], &work[ie], + dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, & + work[iwork], info, (ftnlen)1); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IR), storing result in A */ +/* (Workspace: need N*N) */ + + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[ir], &ldwrkr, &c_b57, &a[a_offset], lda, + (ftnlen)1, (ftnlen)1); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need N + M, prefer N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda, (ftnlen)1); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"R", (char *)"N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, &c__0, m, &c__0, &s[1], &work[ie], + dum, &c__1, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info, (ftnlen)1); + + } + + } else if (wntvo) { + +/* Path 8 (M much larger than N, JOBU='A', JOBVT='O') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); + if (*lwork >= (*n << 1) * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *n) * *n) { + +/* WORK(IU) is LDA by N and WORK(IR) is N by N */ + + ldwrku = *lda; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } else { + +/* WORK(IU) is N by N and WORK(IR) is N by N */ + + ldwrku = *n; + ir = iu + ldwrku * *n; + ldwrkr = *n; + } + itau = ir + ldwrkr * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku, (ftnlen)1); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku, (ftnlen)1); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*N*N + 4*N, */ +/* prefer 2*N*N+3*N+2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_((char *)"U", n, n, &work[iu], &ldwrku, &work[ir], & + ldwrkr, (ftnlen)1); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*N*N + 4*N-1, */ +/* prefer 2*N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in WORK(IR) */ +/* (Workspace: need 2*N*N + BDSPAC) */ + + dbdsqr_((char *)"U", n, n, n, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, + &work[iwork], info, (ftnlen)1); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[iu], &ldwrku, &c_b57, &a[a_offset], lda, + (ftnlen)1, (ftnlen)1); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Copy right singular vectors of R from WORK(IR) to A */ + + dlacpy_((char *)"F", n, n, &work[ir], &ldwrkr, &a[a_offset], + lda, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need N + M, prefer N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Zero out below R in A */ + + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &a[ + a_dim1 + 2], lda, (ftnlen)1); + } + +/* Bidiagonalize R in A */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in A */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"R", (char *)"N", m, n, n, &a[a_offset], lda, & + work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1) + ; + +/* Generate right bidiagonalizing vectors in A */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, + &work[iwork], info, (ftnlen)1); + + } + + } else if (wntvas) { + +/* Path 9 (M much larger than N, JOBU='A', JOBVT='S' */ +/* or 'A') */ +/* M left singular vectors to be computed in U and */ +/* N right singular vectors to be computed in VT */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *n << 2, i__2 = max(i__2,i__3); + if (*lwork >= *n * *n + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *n) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is N by N */ + + ldwrku = *n; + } + itau = iu + ldwrku * *n; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need N*N + N + M, prefer N*N + N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R to WORK(IU), zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[iu], & + ldwrku, (ftnlen)1); + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + 1], &ldwrku, (ftnlen)1); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in WORK(IU), copying result to VT */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_((char *)"U", n, n, &work[iu], &ldwrku, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate left bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", n, n, n, &work[iu], &ldwrku, &work[itauq] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need N*N + 4*N-1, */ +/* prefer N*N+3*N+(N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr, (ftnlen)1) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of R in WORK(IU) and computing */ +/* right singular vectors of R in VT */ +/* (Workspace: need N*N + BDSPAC) */ + + dbdsqr_((char *)"U", n, n, n, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &work[iu], &ldwrku, dum, & + c__1, &work[iwork], info, (ftnlen)1); + +/* Multiply Q in U by left singular vectors of R in */ +/* WORK(IU), storing result in A */ +/* (Workspace: need N*N) */ + + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b79, &u[u_offset], ldu, & + work[iu], &ldwrku, &c_b57, &a[a_offset], lda, + (ftnlen)1, (ftnlen)1); + +/* Copy left singular vectors of A from A to U */ + + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *n; + +/* Compute A=Q*R, copying result to U */ +/* (Workspace: need 2*N, prefer N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate Q in U */ +/* (Workspace: need N + M, prefer N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy R from A to VT, zeroing out below it */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + if (*n > 1) { + i__2 = *n - 1; + i__3 = *n - 1; + dlaset_((char *)"L", &i__2, &i__3, &c_b57, &c_b57, &vt[ + vt_dim1 + 2], ldvt, (ftnlen)1); + } + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize R in VT */ +/* (Workspace: need 4*N, prefer 3*N + 2*N*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], + &work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply Q in U by left bidiagonalizing vectors */ +/* in VT */ +/* (Workspace: need 3*N + M, prefer 3*N + M*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"Q", (char *)"R", (char *)"N", m, n, n, &vt[vt_offset], ldvt, + &work[itauq], &u[u_offset], ldu, &work[iwork], + &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen) + 1); + +/* Generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &vt[vt_offset], ldvt, &work[ + itaup], &work[iwork], &i__2, &ierr, (ftnlen)1) + ; + iwork = ie + *n; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info, (ftnlen)1); + + } + + } + + } + + } else { + +/* M .LT. MNTHR */ + +/* Path 10 (M at least N, but not much larger) */ +/* Reduce to bidiagonal form without QR decomposition */ + + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + iwork = itaup + *n; + +/* Bidiagonalize A */ +/* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB) */ + + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], ldu, ( + ftnlen)1); + if (wntus) { + ncu = *n; + } + if (wntua) { + ncu = *m; + } + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr, (ftnlen)1); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt, ( + ftnlen)1); + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], & + work[iwork], &i__2, &ierr, (ftnlen)1); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*N, prefer 3*N + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr, (ftnlen)1); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", n, n, n, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr, (ftnlen)1); + } + iwork = ie + *n; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info, (ftnlen)1); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info, (ftnlen)1); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info, (ftnlen)1); + } + + } + + } else { + +/* A has more columns than rows. If A has sufficiently more */ +/* columns than rows, first reduce using the LQ decomposition (if */ +/* sufficient workspace available) */ + + if (*n >= mnthr) { + + if (wntvn) { + +/* Path 1t(N much larger than M, JOBVT='N') */ +/* No right singular vectors to be computed */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], & + i__2, &ierr); + +/* Zero out above L */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 << 1) + + 1], lda, (ftnlen)1); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + if (wntuo || wntuas) { + +/* If left singular vectors desired, generate Q */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &a[a_offset], lda, &work[itauq], & + work[iwork], &i__2, &ierr, (ftnlen)1); + } + iwork = ie + *m; + nru = 0; + if (wntuo || wntuas) { + nru = *m; + } + +/* Perform bidiagonal QR iteration, computing left singular */ +/* vectors of A in A if desired */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, & + c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], + info, (ftnlen)1); + +/* If left singular vectors desired in U, copy them there */ + + if (wntuas) { + dlacpy_((char *)"F", m, m, &a[a_offset], lda, &u[u_offset], ldu, ( + ftnlen)1); + } + + } else if (wntvo && wntun) { + +/* Path 2t(N much larger than M, JOBU='N', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *m; + if (*lwork >= max(i__2,i__3) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__2 = wrkbl, i__3 = *lda * *n + *m; + if (*lwork >= max(i__2,i__3) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m - *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy L to WORK(IR) and zero out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr, + (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Generate right vectors bidiagonalizing L */ +/* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_((char *)"U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[ + ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork] + , info, (ftnlen)1); + iu = ie + *m; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M*N + M) */ + + i__2 = *n; + i__3 = chunk; + for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += + i__3) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = min(i__4,chunk); + dgemm_((char *)"N", (char *)"N", m, &blk, m, &c_b79, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, & + work[iu], &ldwrku, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda, (ftnlen)1); +/* L30: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB) */ + + i__3 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + +/* Generate right vectors bidiagonalizing A */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], & + work[iwork], &i__3, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, dum, &c__1, dum, &c__1, &work[ + iwork], info, (ftnlen)1); + + } + + } else if (wntvo && wntuas) { + +/* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */ +/* M right singular vectors to be overwritten on A and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__3 = *m << 2; + if (*lwork >= *m * *m + max(i__3,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *m; + if (*lwork >= max(i__3,i__2) + *lda * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is LDA by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *lda; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__3 = wrkbl, i__2 = *lda * *n + *m; + if (*lwork >= max(i__3,i__2) + *m * *m) { + +/* WORK(IU) is LDA by N and WORK(IR) is M by M */ + + ldwrku = *lda; + chunk = *n; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by CHUNK and WORK(IR) is M by M */ + + ldwrku = *m; + chunk = (*lwork - *m * *m - *m) / *m; + ldwrkr = *m; + } + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__3 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__3, &ierr); + +/* Copy L to U, zeroing about above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &u[u_offset], ldu, ( + ftnlen)1); + i__3 = *m - 1; + i__2 = *m - 1; + dlaset_((char *)"U", &i__3, &i__2, &c_b57, &c_b57, &u[(u_dim1 << + 1) + 1], ldu, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__3 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__3, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U, copying result to WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__3 = *lwork - iwork + 1; + dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__3, &ierr); + dlacpy_((char *)"U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr, + (ftnlen)1); + +/* Generate right vectors bidiagonalizing L in WORK(IR) */ +/* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[ir], &ldwrkr, &work[itaup], & + work[iwork], &i__3, &ierr, (ftnlen)1); + +/* Generate left vectors bidiagonalizing L in U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */ + + i__3 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__3, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U, and computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_((char *)"U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], + &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info, (ftnlen)1); + iu = ie + *m; + +/* Multiply right singular vectors of L in WORK(IR) by Q */ +/* in A, storing result in WORK(IU) and copying to A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M*N + M)) */ + + i__3 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ += + i__2) { +/* Computing MIN */ + i__4 = *n - i__ + 1; + blk = min(i__4,chunk); + dgemm_((char *)"N", (char *)"N", m, &blk, m, &c_b79, &work[ir], & + ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, & + work[iu], &ldwrku, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, &blk, &work[iu], &ldwrku, &a[i__ * + a_dim1 + 1], lda, (ftnlen)1); +/* L40: */ + } + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork] + , &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &u[u_offset], ldu, ( + ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 << + 1) + 1], ldu, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[ + itauq], &work[itaup], &work[iwork], &i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in A */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"T", m, n, m, &u[u_offset], ldu, &work[ + itaup], &a[a_offset], lda, &work[iwork], &i__2, & + ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + +/* Generate left vectors bidiagonalizing L in U */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &u[u_offset], ldu, &work[itauq], & + work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", m, n, m, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info, (ftnlen)1); + + } + + } else if (wntvs) { + + if (wntun) { + +/* Path 4t(N much larger than M, JOBU='N', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IR), zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr, (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right vectors bidiagonalizing L in */ +/* WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_((char *)"U", m, m, &c__0, &c__0, &s[1], &work[ie], & + work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & + work[iwork], info, (ftnlen)1); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b79, &work[ir], &ldwrkr, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt, (ftnlen)1, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy result to VT */ + + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda, (ftnlen)1); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", m, n, &c__0, &c__0, &s[1], &work[ie], & + vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & + work[iwork], info, (ftnlen)1); + + } + + } else if (wntuo) { + +/* Path 5t(N much larger than M, JOBU='O', JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= (*m << 1) * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out below it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku, (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*M*M + 4*M, */ +/* prefer 2*M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_((char *)"L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*M*M + 4*M-1, */ +/* prefer 2*M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (Workspace: need 2*M*M + BDSPAC) */ + + dbdsqr_((char *)"U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info, (ftnlen)1); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b79, &work[iu], &ldwrku, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt, (ftnlen)1, (ftnlen)1); + +/* Copy left singular vectors of L to A */ +/* (Workspace: need M*M) */ + + dlacpy_((char *)"F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda, (ftnlen)1); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right vectors bidiagonalizing L by Q in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + +/* Generate left bidiagonalizing vectors of L in A */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, compute left */ +/* singular vectors of A in A and compute right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, & + c__1, &work[iwork], info, (ftnlen)1); + + } + + } else if (wntuas) { + +/* Path 6t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='S') */ +/* M right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *m << 2; + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by N */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is LDA by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku, (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku, (ftnlen)1); + +/* Generate Q in A */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_((char *)"L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need M*M + 4*M-1, */ +/* prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_((char *)"U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info, (ftnlen)1); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in A, storing result in VT */ +/* (Workspace: need M*M) */ + + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b79, &work[iu], &ldwrku, + &a[a_offset], lda, &c_b57, &vt[vt_offset], + ldvt, (ftnlen)1, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 + << 1) + 1], ldu, (ftnlen)1); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"T", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info, (ftnlen)1); + + } + + } + + } else if (wntva) { + + if (wntun) { + +/* Path 7t(N much larger than M, JOBU='N', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* no left singular vectors to be computed */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + ir = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IR) is LDA by M */ + + ldwrkr = *lda; + } else { + +/* WORK(IR) is M by M */ + + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Copy L to WORK(IR), zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[ir], & + ldwrkr, (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + + ldwrkr], &ldwrkr, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need M*M + M + N, prefer M*M + M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IR) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Generate right bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need M*M + 4*M-1, */ +/* prefer M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[ir], &ldwrkr, &work[itaup] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of L in WORK(IR) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_((char *)"U", m, m, &c__0, &c__0, &s[1], &work[ie], & + work[ir], &ldwrkr, dum, &c__1, dum, &c__1, & + work[iwork], info, (ftnlen)1); + +/* Multiply right singular vectors of L in WORK(IR) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b79, &work[ir], &ldwrkr, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda, (ftnlen)1, (ftnlen)1); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need M + N, prefer M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda, (ftnlen)1); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", m, n, &c__0, &c__0, &s[1], &work[ie], & + vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, & + work[iwork], info, (ftnlen)1); + + } + + } else if (wntuo) { + +/* Path 8t(N much larger than M, JOBU='O', JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be overwritten on A */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); + if (*lwork >= (*m << 1) * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + (*lda << 1) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is LDA by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *lda; + } else if (*lwork >= wrkbl + (*lda + *m) * *m) { + +/* WORK(IU) is LDA by M and WORK(IR) is M by M */ + + ldwrku = *lda; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } else { + +/* WORK(IU) is M by M and WORK(IR) is M by M */ + + ldwrku = *m; + ir = iu + ldwrku * *m; + ldwrkr = *m; + } + itau = ir + ldwrkr * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku, (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku, (ftnlen)1); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to */ +/* WORK(IR) */ +/* (Workspace: need 2*M*M + 4*M, */ +/* prefer 2*M*M+3*M+2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_((char *)"L", m, m, &work[iu], &ldwrku, &work[ir], & + ldwrkr, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need 2*M*M + 4*M-1, */ +/* prefer 2*M*M+3*M+(M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + +/* Generate left bidiagonalizing vectors in WORK(IR) */ +/* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &work[ir], &ldwrkr, &work[itauq] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in WORK(IR) and computing */ +/* right singular vectors of L in WORK(IU) */ +/* (Workspace: need 2*M*M + BDSPAC) */ + + dbdsqr_((char *)"U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, + &work[iwork], info, (ftnlen)1); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b79, &work[iu], &ldwrku, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda, (ftnlen)1, (ftnlen)1); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Copy left singular vectors of A from WORK(IR) to A */ + + dlacpy_((char *)"F", m, m, &work[ir], &ldwrkr, &a[a_offset], + lda, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need M + N, prefer M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Zero out above L in A */ + + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 + << 1) + 1], lda, (ftnlen)1); + +/* Bidiagonalize L in A */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in A by Q */ +/* in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"T", m, n, m, &a[a_offset], lda, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + +/* Generate left bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &a[a_offset], lda, &work[itauq], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in A and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, & + c__1, &work[iwork], info, (ftnlen)1); + + } + + } else if (wntuas) { + +/* Path 9t(N much larger than M, JOBU='S' or 'A', */ +/* JOBVT='A') */ +/* N right singular vectors to be computed in VT and */ +/* M left singular vectors to be computed in U */ + +/* Computing MAX */ + i__2 = *n + *m, i__3 = *m << 2, i__2 = max(i__2,i__3); + if (*lwork >= *m * *m + max(i__2,bdspac)) { + +/* Sufficient workspace for a fast algorithm */ + + iu = 1; + if (*lwork >= wrkbl + *lda * *m) { + +/* WORK(IU) is LDA by M */ + + ldwrku = *lda; + } else { + +/* WORK(IU) is M by M */ + + ldwrku = *m; + } + itau = iu + ldwrku * *m; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need M*M + M + N, prefer M*M + M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to WORK(IU), zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[iu], & + ldwrku, (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + + ldwrku], &ldwrku, (ftnlen)1); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in WORK(IU), copying result to U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + dlacpy_((char *)"L", m, m, &work[iu], &ldwrku, &u[u_offset], + ldu, (ftnlen)1); + +/* Generate right bidiagonalizing vectors in WORK(IU) */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, m, m, &work[iu], &ldwrku, &work[itaup] + , &work[iwork], &i__2, &ierr, (ftnlen)1); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of L in U and computing right */ +/* singular vectors of L in WORK(IU) */ +/* (Workspace: need M*M + BDSPAC) */ + + dbdsqr_((char *)"U", m, m, m, &c__0, &s[1], &work[ie], &work[ + iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info, (ftnlen)1); + +/* Multiply right singular vectors of L in WORK(IU) by */ +/* Q in VT, storing result in A */ +/* (Workspace: need M*M) */ + + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b79, &work[iu], &ldwrku, + &vt[vt_offset], ldvt, &c_b57, &a[a_offset], + lda, (ftnlen)1, (ftnlen)1); + +/* Copy right singular vectors of A from A to VT */ + + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + + } else { + +/* Insufficient workspace for a fast algorithm */ + + itau = 1; + iwork = itau + *m; + +/* Compute A=L*Q, copying result to VT */ +/* (Workspace: need 2*M, prefer M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[ + iwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], + ldvt, (ftnlen)1); + +/* Generate Q in VT */ +/* (Workspace: need M + N, prefer M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], & + work[iwork], &i__2, &ierr); + +/* Copy L to U, zeroing out above it */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &u[u_offset], + ldu, (ftnlen)1); + i__2 = *m - 1; + i__3 = *m - 1; + dlaset_((char *)"U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 + << 1) + 1], ldu, (ftnlen)1); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize L in U */ +/* (Workspace: need 4*M, prefer 3*M + 2*M*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], & + work[itauq], &work[itaup], &work[iwork], & + i__2, &ierr); + +/* Multiply right bidiagonalizing vectors in U by Q */ +/* in VT */ +/* (Workspace: need 3*M + N, prefer 3*M + N*NB) */ + + i__2 = *lwork - iwork + 1; + dormbr_((char *)"P", (char *)"L", (char *)"T", m, n, m, &u[u_offset], ldu, & + work[itaup], &vt[vt_offset], ldvt, &work[ + iwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + +/* Generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, m, &u[u_offset], ldu, &work[itauq], + &work[iwork], &i__2, &ierr, (ftnlen)1); + iwork = ie + *m; + +/* Perform bidiagonal QR iteration, computing left */ +/* singular vectors of A in U and computing right */ +/* singular vectors of A in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"U", m, n, m, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, & + c__1, &work[iwork], info, (ftnlen)1); + + } + + } + + } + + } else { + +/* N .LT. MNTHR */ + +/* Path 10t(N greater than M, but not much larger) */ +/* Reduce to bidiagonal form without LQ decomposition */ + + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + iwork = itaup + *m; + +/* Bidiagonalize A */ +/* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB) */ + + i__2 = *lwork - iwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & + work[itaup], &work[iwork], &i__2, &ierr); + if (wntuas) { + +/* If left singular vectors desired in U, copy result to U */ +/* and generate left bidiagonalizing vectors in U */ +/* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB) */ + + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &u[u_offset], ldu, ( + ftnlen)1); + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[ + iwork], &i__2, &ierr, (ftnlen)1); + } + if (wntvas) { + +/* If right singular vectors desired in VT, copy result to */ +/* VT and generate right bidiagonalizing vectors in VT */ +/* (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB) */ + + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt, ( + ftnlen)1); + if (wntva) { + nrvt = *n; + } + if (wntvs) { + nrvt = *m; + } + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], + &work[iwork], &i__2, &ierr, (ftnlen)1); + } + if (wntuo) { + +/* If left singular vectors desired in A, generate left */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[ + iwork], &i__2, &ierr, (ftnlen)1); + } + if (wntvo) { + +/* If right singular vectors desired in A, generate right */ +/* bidiagonalizing vectors in A */ +/* (Workspace: need 4*M, prefer 3*M + M*NB) */ + + i__2 = *lwork - iwork + 1; + dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ + iwork], &i__2, &ierr, (ftnlen)1); + } + iwork = ie + *m; + if (wntuas || wntuo) { + nru = *m; + } + if (wntun) { + nru = 0; + } + if (wntvas || wntvo) { + ncvt = *n; + } + if (wntvn) { + ncvt = 0; + } + if (! wntuo && ! wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, & + work[iwork], info, (ftnlen)1); + } else if (! wntuo && wntvo) { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in U and computing right singular */ +/* vectors in A */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[ + a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[ + iwork], info, (ftnlen)1); + } else { + +/* Perform bidiagonal QR iteration, if desired, computing */ +/* left singular vectors in A and computing right singular */ +/* vectors in VT */ +/* (Workspace: need BDSPAC) */ + + dbdsqr_((char *)"L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[ + vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, & + work[iwork], info, (ftnlen)1); + } + + } + + } + +/* If DBDSQR failed to converge, copy unconverged superdiagonals */ +/* to WORK( 2:MINMN ) */ + + if (*info != 0) { + if (ie > 2) { + i__2 = minmn - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__ + 1] = work[i__ + ie - 1]; +/* L50: */ + } + } + if (ie < 2) { + for (i__ = minmn - 1; i__ >= 1; --i__) { + work[i__ + 1] = work[i__ + ie - 1]; +/* L60: */ + } + } + } + +/* Undo scaling if necessary */ + + if (iscl == 1) { + if (anrm > bignum) { + dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr, (ftnlen)1); + } + if (*info != 0 && anrm > bignum) { + i__2 = minmn - 1; + dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2], + &minmn, &ierr, (ftnlen)1); + } + if (anrm < smlnum) { + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & + minmn, &ierr, (ftnlen)1); + } + if (*info != 0 && anrm < smlnum) { + i__2 = minmn - 1; + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2], + &minmn, &ierr, (ftnlen)1); + } + } + +/* Return optimal workspace in WORK(1) */ + + work[1] = (doublereal) maxwrk; + + return 0; + +/* End of DGESVD */ + +} /* dgesvd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgetf2.cpp b/lib/linalg/dgetf2.cpp new file mode 100644 index 0000000000..0cb6b57a2f --- /dev/null +++ b/lib/linalg/dgetf2.cpp @@ -0,0 +1,266 @@ +/* fortran/dgetf2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = -1.; + +/* > \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row + interchanges (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETF2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETF2 computes an LU factorization of a general m-by-n matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the right-looking Level 2 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (min(M,N)) */ +/* > The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, U(k,k) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, jp; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *), dscal_(integer *, doublereal *, doublereal *, integer + *); + doublereal sfmin; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGETF2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Compute machine safe minimum */ + + sfmin = dlamch_((char *)"S", (ftnlen)1); + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + +/* Find pivot and test for singularity. */ + + i__2 = *m - j + 1; + jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1); + ipiv[j] = jp; + if (a[jp + j * a_dim1] != 0.) { + +/* Apply the interchange to columns 1:N. */ + + if (jp != j) { + dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda); + } + +/* Compute elements J+1:M of J-th column. */ + + if (j < *m) { + if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) { + i__2 = *m - j; + d__1 = 1. / a[j + j * a_dim1]; + dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } else { + i__2 = *m - j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; +/* L20: */ + } + } + } + + } else if (*info == 0) { + + *info = j; + } + + if (j < min(*m,*n)) { + +/* Update trailing submatrix. */ + + i__2 = *m - j; + i__3 = *n - j; + dger_(&i__2, &i__3, &c_b8, &a[j + 1 + j * a_dim1], &c__1, &a[j + ( + j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + return 0; + +/* End of DGETF2 */ + +} /* dgetf2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgetrf.cpp b/lib/linalg/dgetrf.cpp new file mode 100644 index 0000000000..eca7500a03 --- /dev/null +++ b/lib/linalg/dgetrf.cpp @@ -0,0 +1,293 @@ +/* fortran/dgetrf.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b16 = 1.; +static doublereal c_b19 = -1.; + +/* > \brief \b DGETRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETRF computes an LU factorization of a general M-by-N matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the right-looking Level 3 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (min(M,N)) */ +/* > The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j, jb, nb; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + integer iinfo; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), xerbla_( + char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *), dgetrf2_(integer *, + integer *, doublereal *, integer *, integer *, integer *); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGETRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, (char *)"DGETRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) + 1); + if (nb <= 1 || nb >= min(*m,*n)) { + +/* Use unblocked code. */ + + dgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + +/* Use blocked code. */ + + i__1 = min(*m,*n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { +/* Computing MIN */ + i__3 = min(*m,*n) - j + 1; + jb = min(i__3,nb); + +/* Factor diagonal and subdiagonal blocks and test for exact */ +/* singularity. */ + + i__3 = *m - j + 1; + dgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + +/* Adjust INFO and the pivot indices. */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } +/* Computing MIN */ + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4,i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; +/* L10: */ + } + +/* Apply interchanges to columns 1:J-1. */ + + i__3 = j - 1; + i__4 = j + jb - 1; + dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + + if (j + jb <= *n) { + +/* Apply interchanges to columns J+JB:N. */ + + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & + ipiv[1], &c__1); + +/* Compute block row of U. */ + + i__3 = *n - j - jb + 1; + dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", &jb, &i__3, & + c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * + a_dim1], lda, (ftnlen)4, (ftnlen)5, (ftnlen)12, ( + ftnlen)4); + if (j + jb <= *m) { + +/* Update trailing submatrix. */ + + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + dgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &jb, + &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) * + a_dim1], lda, (ftnlen)12, (ftnlen)12); + } + } +/* L20: */ + } + } + return 0; + +/* End of DGETRF */ + +} /* dgetrf_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgetrf2.cpp b/lib/linalg/dgetrf2.cpp new file mode 100644 index 0000000000..94162fdca8 --- /dev/null +++ b/lib/linalg/dgetrf2.cpp @@ -0,0 +1,331 @@ +/* static/dgetrf2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b13 = 1.; +static doublereal c_b16 = -1.; + +/* > \brief \b DGETRF2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETRF2 computes an LU factorization of a general M-by-N matrix A */ +/* > using partial pivoting with row interchanges. */ +/* > */ +/* > The factorization has the form */ +/* > A = P * L * U */ +/* > where P is a permutation matrix, L is lower triangular with unit */ +/* > diagonal elements (lower trapezoidal if m > n), and U is upper */ +/* > triangular (upper trapezoidal if m < n). */ +/* > */ +/* > This is the recursive version of the algorithm. It divides */ +/* > the matrix into four submatrices: */ +/* > */ +/* > [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 */ +/* > A = [ -----|----- ] with n1 = min(m,n)/2 */ +/* > [ A21 | A22 ] n2 = n-n1 */ +/* > */ +/* > [ A11 ] */ +/* > The subroutine calls itself to factor [ --- ], */ +/* > [ A12 ] */ +/* > [ A12 ] */ +/* > do the swaps on [ --- ], solve A12, update A22, */ +/* > [ A22 ] */ +/* > */ +/* > then calls itself to factor A22 and do the swaps on A21. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the M-by-N matrix to be factored. */ +/* > On exit, the factors L and U from the factorization */ +/* > A = P*L*U; the unit diagonal elements of L are not stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (min(M,N)) */ +/* > The pivot indices; for 1 <= i <= min(M,N), row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ +/* > has been completed, but the factor U is exactly */ +/* > singular, and division by zero will occur if it is used */ +/* > to solve a system of equations. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetrf2_(integer *m, integer *n, doublereal *a, integer * + lda, integer *ipiv, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, n1, n2; + doublereal temp; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemm_(char *, char *, integer *, integer *, integer * + , doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen, ftnlen); + integer iinfo; + doublereal sfmin; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dlaswp_( + integer *, doublereal *, integer *, integer *, integer *, integer + *, integer *); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGETRF2", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (*m == 1) { + +/* Use unblocked code for one row case */ +/* Just need to handle IPIV and INFO */ + + ipiv[1] = 1; + if (a[a_dim1 + 1] == 0.) { + *info = 1; + } + + } else if (*n == 1) { + +/* Use unblocked code for one column case */ + + +/* Compute machine safe minimum */ + + sfmin = dlamch_((char *)"S", (ftnlen)1); + +/* Find pivot and test for singularity */ + + i__ = idamax_(m, &a[a_dim1 + 1], &c__1); + ipiv[1] = i__; + if (a[i__ + a_dim1] != 0.) { + +/* Apply the interchange */ + + if (i__ != 1) { + temp = a[a_dim1 + 1]; + a[a_dim1 + 1] = a[i__ + a_dim1]; + a[i__ + a_dim1] = temp; + } + +/* Compute elements 2:M of the column */ + + if ((d__1 = a[a_dim1 + 1], abs(d__1)) >= sfmin) { + i__1 = *m - 1; + d__1 = 1. / a[a_dim1 + 1]; + dscal_(&i__1, &d__1, &a[a_dim1 + 2], &c__1); + } else { + i__1 = *m - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + 1 + a_dim1] /= a[a_dim1 + 1]; +/* L10: */ + } + } + + } else { + *info = 1; + } + + } else { + +/* Use recursive code */ + + n1 = min(*m,*n) / 2; + n2 = *n - n1; + +/* [ A11 ] */ +/* Factor [ --- ] */ +/* [ A21 ] */ + + dgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + +/* [ A12 ] */ +/* Apply interchanges to [ --- ] */ +/* [ A22 ] */ + + dlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], & + c__1); + +/* Solve A12 */ + + dtrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", &n1, &n2, &c_b13, &a[a_offset], lda, &a[( + n1 + 1) * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + +/* Update A22 */ + + i__1 = *m - n1; + dgemm_((char *)"N", (char *)"N", &i__1, &n2, &n1, &c_b16, &a[n1 + 1 + a_dim1], lda, & + a[(n1 + 1) * a_dim1 + 1], lda, &c_b13, &a[n1 + 1 + (n1 + 1) * + a_dim1], lda, (ftnlen)1, (ftnlen)1); + +/* Factor A22 */ + + i__1 = *m - n1; + dgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + + 1], &iinfo); + +/* Adjust INFO and the pivot indices */ + + if (*info == 0 && iinfo > 0) { + *info = iinfo + n1; + } + i__1 = min(*m,*n); + for (i__ = n1 + 1; i__ <= i__1; ++i__) { + ipiv[i__] += n1; +/* L20: */ + } + +/* Apply interchanges to A21 */ + + i__1 = n1 + 1; + i__2 = min(*m,*n); + dlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1); + + } + return 0; + +/* End of DGETRF2 */ + +} /* dgetrf2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgetri.cpp b/lib/linalg/dgetri.cpp new file mode 100644 index 0000000000..23178071b6 --- /dev/null +++ b/lib/linalg/dgetri.cpp @@ -0,0 +1,343 @@ +/* fortran/dgetri.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static doublereal c_b20 = -1.; +static doublereal c_b22 = 1.; + +/* > \brief \b DGETRI */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETRI + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDA, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETRI computes the inverse of a matrix using the LU factorization */ +/* > computed by DGETRF. */ +/* > */ +/* > This method inverts U and then computes inv(A) by solving the system */ +/* > inv(A)*L = inv(U) for inv(A). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the factors L and U from the factorization */ +/* > A = P*L*U as computed by DGETRF. */ +/* > On exit, if INFO = 0, the inverse of the original matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,N). */ +/* > For optimal performance LWORK >= N*NB, where NB is */ +/* > the optimal blocksize returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, U(i,i) is exactly zero; the matrix is */ +/* > singular and its inverse could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetri_(integer *n, doublereal *a, integer *lda, integer + *ipiv, doublereal *work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, jb, nb, jj, jp, nn, iws; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), + dgemv_(char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, ftnlen); + integer nbmin; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), xerbla_( + char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork; + extern /* Subroutine */ int dtrtri_(char *, char *, integer *, doublereal + *, integer *, integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*lda < max(1,*n)) { + *info = -3; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGETRI", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, */ +/* and the inverse is not computed. */ + + dtrtri_((char *)"Upper", (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)5, ( + ftnlen)8); + if (*info > 0) { + return 0; + } + + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { +/* Computing MAX */ + i__1 = ldwork * nb; + iws = max(i__1,1); + if (*lwork < iws) { + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DGETRI", (char *)" ", n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } else { + iws = *n; + } + +/* Solve the equation inv(A)*L = inv(U) for inv(A). */ + + if (nb < nbmin || nb >= *n) { + +/* Use unblocked code. */ + + for (j = *n; j >= 1; --j) { + +/* Copy current column of L to WORK and replace with zeros. */ + + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + work[i__] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } + +/* Compute current column of inv(A). */ + + if (j < *n) { + i__1 = *n - j; + dgemv_((char *)"No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + + 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + + 1], &c__1, (ftnlen)12); + } +/* L20: */ + } + } else { + +/* Use blocked code. */ + + nn = (*n - 1) / nb * nb + 1; + i__1 = -nb; + for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *n - j + 1; + jb = min(i__2,i__3); + +/* Copy current block column of L to WORK and replace with */ +/* zeros. */ + + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = *n; + for (i__ = jj + 1; i__ <= i__3; ++i__) { + work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1]; + a[i__ + jj * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + +/* Compute current block column of inv(A). */ + + if (j + jb <= *n) { + i__2 = *n - j - jb + 1; + dgemm_((char *)"No transpose", (char *)"No transpose", n, &jb, &i__2, &c_b20, + &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], & + ldwork, &c_b22, &a[j * a_dim1 + 1], lda, (ftnlen)12, ( + ftnlen)12); + } + dtrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, &jb, &c_b22, & + work[j], &ldwork, &a[j * a_dim1 + 1], lda, (ftnlen)5, ( + ftnlen)5, (ftnlen)12, (ftnlen)4); +/* L50: */ + } + } + +/* Apply column interchanges. */ + + for (j = *n - 1; j >= 1; --j) { + jp = ipiv[j]; + if (jp != j) { + dswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); + } +/* L60: */ + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DGETRI */ + +} /* dgetri_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dgetrs.cpp b/lib/linalg/dgetrs.cpp new file mode 100644 index 0000000000..4df18e8650 --- /dev/null +++ b/lib/linalg/dgetrs.cpp @@ -0,0 +1,272 @@ +/* fortran/dgetrs.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b12 = 1.; +static integer c_n1 = -1; + +/* > \brief \b DGETRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DGETRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER TRANS */ +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DGETRS solves a system of linear equations */ +/* > A * X = B or A**T * X = B */ +/* > with a general N-by-N matrix A using the LU factorization computed */ +/* > by DGETRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the form of the system of equations: */ +/* > = 'N': A * X = B (No transpose) */ +/* > = 'T': A**T* X = B (Transpose) */ +/* > = 'C': A**T* X = B (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The factors L and U from the factorization A = P*L*U */ +/* > as computed by DGETRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (N) */ +/* > The pivot indices from DGETRF; for 1<=i<=N, row i of the */ +/* > matrix was interchanged with row IPIV(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs, + doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * + ldb, integer *info, ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), xerbla_( + char *, integer *, ftnlen), dlaswp_(integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *); + logical notran; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_( + trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGETRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (notran) { + +/* Solve A * X = B. */ + +/* Apply row interchanges to the right hand sides. */ + + dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); + +/* Solve L*X = B, overwriting B with X. */ + + dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, ( + ftnlen)12, (ftnlen)4); + +/* Solve U*X = B, overwriting B with X. */ + + dtrsm_((char *)"Left", (char *)"Upper", (char *)"No transpose", (char *)"Non-unit", n, nrhs, &c_b12, & + a[a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, ( + ftnlen)12, (ftnlen)8); + } else { + +/* Solve A**T * X = B. */ + +/* Solve U**T *X = B, overwriting B with X. */ + + dtrsm_((char *)"Left", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, ( + ftnlen)9, (ftnlen)8); + +/* Solve L**T *X = B, overwriting B with X. */ + + dtrsm_((char *)"Left", (char *)"Lower", (char *)"Transpose", (char *)"Unit", n, nrhs, &c_b12, &a[ + a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, ( + ftnlen)9, (ftnlen)4); + +/* Apply row interchanges to the solution vectors. */ + + dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); + } + + return 0; + +/* End of DGETRS */ + +} /* dgetrs_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/disnan.cpp b/lib/linalg/disnan.cpp new file mode 100644 index 0000000000..9e5bc1094e --- /dev/null +++ b/lib/linalg/disnan.cpp @@ -0,0 +1,14 @@ + +#include + +extern "C" { + +#include "lmp_f2c.h" + +logical disnan_(const doublereal *din) +{ + if (!din) return TRUE_; + + return std::isnan(*din) ? TRUE_ : FALSE_; +} +} diff --git a/lib/linalg/dlabad.cpp b/lib/linalg/dlabad.cpp new file mode 100644 index 0000000000..96eb6efcca --- /dev/null +++ b/lib/linalg/dlabad.cpp @@ -0,0 +1,128 @@ +/* fortran/dlabad.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLABAD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLABAD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLABAD( SMALL, LARGE ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION LARGE, SMALL */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLABAD takes as input the values computed by DLAMCH for underflow and */ +/* > overflow, and returns the square root of each of these values if the */ +/* > log of LARGE is sufficiently large. This subroutine is intended to */ +/* > identify machines with a large exponent range, such as the Crays, and */ +/* > redefine the underflow and overflow limits to be the square roots of */ +/* > the values computed by DLAMCH. This subroutine is needed because */ +/* > DLAMCH does not compensate for poor arithmetic in the upper half of */ +/* > the exponent range, as is found on a Cray. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in,out] SMALL */ +/* > \verbatim */ +/* > SMALL is DOUBLE PRECISION */ +/* > On entry, the underflow threshold as computed by DLAMCH. */ +/* > On exit, if LOG10(LARGE) is sufficiently large, the square */ +/* > root of SMALL, otherwise unchanged. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] LARGE */ +/* > \verbatim */ +/* > LARGE is DOUBLE PRECISION */ +/* > On entry, the overflow threshold as computed by DLAMCH. */ +/* > On exit, if LOG10(LARGE) is sufficiently large, the square */ +/* > root of LARGE, otherwise unchanged. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlabad_(doublereal *small, doublereal *large) +{ + /* Builtin functions */ + double d_lg10(doublereal *), sqrt(doublereal); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* If it looks like we're on a Cray, take the square root of */ +/* SMALL and LARGE to avoid overflow and underflow problems. */ + + if (d_lg10(large) > 2e3) { + *small = sqrt(*small); + *large = sqrt(*large); + } + + return 0; + +/* End of DLABAD */ + +} /* dlabad_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlabrd.cpp b/lib/linalg/dlabrd.cpp new file mode 100644 index 0000000000..5775cc7bc1 --- /dev/null +++ b/lib/linalg/dlabrd.cpp @@ -0,0 +1,532 @@ +/* fortran/dlabrd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b4 = -1.; +static doublereal c_b5 = 1.; +static integer c__1 = 1; +static doublereal c_b16 = 0.; + +/* > \brief \b DLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLABRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, */ +/* LDY ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER LDA, LDX, LDY, M, N, NB */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), */ +/* $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLABRD reduces the first NB rows and columns of a real general */ +/* > m by n matrix A to upper or lower bidiagonal form by an orthogonal */ +/* > transformation Q**T * A * P, and returns the matrices X and Y which */ +/* > are needed to apply the transformation to the unreduced part of A. */ +/* > */ +/* > If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower */ +/* > bidiagonal form. */ +/* > */ +/* > This is an auxiliary routine called by DGEBRD */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows in the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns in the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER */ +/* > The number of leading rows and columns of A to be reduced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the m by n general matrix to be reduced. */ +/* > On exit, the first NB rows and columns of the matrix are */ +/* > overwritten; the rest of the array is unchanged. */ +/* > If m >= n, elements on and below the diagonal in the first NB */ +/* > columns, with the array TAUQ, represent the orthogonal */ +/* > matrix Q as a product of elementary reflectors; and */ +/* > elements above the diagonal in the first NB rows, with the */ +/* > array TAUP, represent the orthogonal matrix P as a product */ +/* > of elementary reflectors. */ +/* > If m < n, elements below the diagonal in the first NB */ +/* > columns, with the array TAUQ, represent the orthogonal */ +/* > matrix Q as a product of elementary reflectors, and */ +/* > elements on and above the diagonal in the first NB rows, */ +/* > with the array TAUP, represent the orthogonal matrix P as */ +/* > a product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (NB) */ +/* > The diagonal elements of the first NB rows and columns of */ +/* > the reduced matrix. D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (NB) */ +/* > The off-diagonal elements of the first NB rows and columns of */ +/* > the reduced matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUQ */ +/* > \verbatim */ +/* > TAUQ is DOUBLE PRECISION array, dimension (NB) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix Q. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAUP */ +/* > \verbatim */ +/* > TAUP is DOUBLE PRECISION array, dimension (NB) */ +/* > The scalar factors of the elementary reflectors which */ +/* > represent the orthogonal matrix P. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (LDX,NB) */ +/* > The m-by-nb matrix X required to update the unreduced part */ +/* > of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDX */ +/* > \verbatim */ +/* > LDX is INTEGER */ +/* > The leading dimension of the array X. LDX >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Y */ +/* > \verbatim */ +/* > Y is DOUBLE PRECISION array, dimension (LDY,NB) */ +/* > The n-by-nb matrix Y required to update the unreduced part */ +/* > of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDY */ +/* > \verbatim */ +/* > LDY is INTEGER */ +/* > The leading dimension of the array Y. LDY >= max(1,N). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The matrices Q and P are represented as products of elementary */ +/* > reflectors: */ +/* > */ +/* > Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) */ +/* > */ +/* > Each H(i) and G(i) has the form: */ +/* > */ +/* > H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T */ +/* > */ +/* > where tauq and taup are real scalars, and v and u are real vectors. */ +/* > */ +/* > If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in */ +/* > A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in */ +/* > A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in */ +/* > A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in */ +/* > A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */ +/* > */ +/* > The elements of the vectors v and u together form the m-by-nb matrix */ +/* > V and the nb-by-n matrix U**T which are needed, with X and Y, to apply */ +/* > the transformation to the unreduced part of the matrix, using a block */ +/* > update of the form: A := A - V*Y**T - X*U**T. */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with nb = 2: */ +/* > */ +/* > m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ +/* > */ +/* > ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) */ +/* > ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) */ +/* > ( v1 v2 a a a ) ( v1 1 a a a a ) */ +/* > ( v1 v2 a a a ) ( v1 v2 a a a a ) */ +/* > ( v1 v2 a a a ) ( v1 v2 a a a a ) */ +/* > ( v1 v2 a a a ) */ +/* > */ +/* > where a denotes an element of the original matrix which is unchanged, */ +/* > vi denotes an element of the vector defining H(i), and ui an element */ +/* > of the vector defining G(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq, + doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer + *ldy) +{ + /* System generated locals */ + integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dgemv_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, ftnlen), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tauq; + --taup; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (*m >= *n) { + +/* Reduce to upper bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:m,i) */ + + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + a_dim1], lda, + &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + i__ * a_dim1], & + c__1, (ftnlen)12); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + x_dim1], ldx, + &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[i__ + i__ * + a_dim1], &c__1, (ftnlen)12); + +/* Generate reflection Q(i) to annihilate A(i+1:m,i) */ + + i__2 = *m - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * + a_dim1], &c__1, &tauq[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b16, & + y[i__ + 1 + i__ * y_dim1], &c__1, (ftnlen)9); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], + lda, &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * + y_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[ + i__ + 1 + i__ * y_dim1], &c__1, (ftnlen)12); + i__2 = *m - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &x[i__ + x_dim1], + ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * + y_dim1 + 1], &c__1, (ftnlen)9); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * + a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b5, + &y[i__ + 1 + i__ * y_dim1], &c__1, (ftnlen)9); + i__2 = *n - i__; + dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + +/* Update A(i,i+1:n) */ + + i__2 = *n - i__; + dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &y[i__ + 1 + + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b5, &a[i__ + ( + i__ + 1) * a_dim1], lda, (ftnlen)12); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * + a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b5, &a[ + i__ + (i__ + 1) * a_dim1], lda, (ftnlen)9); + +/* Generate reflection P(i) to annihilate A(i,i+2:n) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( + i__3,*n) * a_dim1], lda, &taup[i__]); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], + lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1, ( + ftnlen)12); + i__2 = *n - i__; + dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &y[i__ + 1 + y_dim1], + ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[ + i__ * x_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *m - i__; + dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ + i__ + 1 + i__ * x_dim1], &c__1, (ftnlen)12); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b16, &x[i__ * x_dim1 + 1], &c__1, (ftnlen)12); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ + i__ + 1 + i__ * x_dim1], &c__1, (ftnlen)12); + i__2 = *m - i__; + dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + } +/* L10: */ + } + } else { + +/* Reduce to lower bidiagonal form */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i,i:n) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + y_dim1], ldy, + &a[i__ + a_dim1], lda, &c_b5, &a[i__ + i__ * a_dim1], + lda, (ftnlen)12); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b4, &a[i__ * a_dim1 + 1], + lda, &x[i__ + x_dim1], ldx, &c_b5, &a[i__ + i__ * a_dim1], + lda, (ftnlen)9); + +/* Generate reflection P(i) to annihilate A(i,i+1:n) */ + + i__2 = *n - i__ + 1; +/* Computing MIN */ + i__3 = i__ + 1; + dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * + a_dim1], lda, &taup[i__]); + d__[i__] = a[i__ + i__ * a_dim1]; + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.; + +/* Compute X(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = *n - i__ + 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + i__ * + a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b16, & + x[i__ + 1 + i__ * x_dim1], &c__1, (ftnlen)12); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &y[i__ + y_dim1], + ldy, &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * + x_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ + i__ + 1 + i__ * x_dim1], &c__1, (ftnlen)12); + i__2 = i__ - 1; + i__3 = *n - i__ + 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ * a_dim1 + + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * + x_dim1 + 1], &c__1, (ftnlen)12); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ + i__ + 1 + i__ * x_dim1], &c__1, (ftnlen)12); + i__2 = *m - i__; + dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); + +/* Update A(i+1:m,i) */ + + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + + 1 + i__ * a_dim1], &c__1, (ftnlen)12); + i__2 = *m - i__; + dgemv_((char *)"No transpose", &i__2, &i__, &c_b4, &x[i__ + 1 + + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[ + i__ + 1 + i__ * a_dim1], &c__1, (ftnlen)12); + +/* Generate reflection Q(i) to annihilate A(i+2:m,i) */ + + i__2 = *m - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + + i__ * a_dim1], &c__1, &tauq[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute Y(i+1:n,i) */ + + i__2 = *m - i__; + i__3 = *n - i__; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, + &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1, (ftnlen)9); + i__2 = *m - i__; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[ + i__ * y_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[ + i__ + 1 + i__ * y_dim1], &c__1, (ftnlen)12); + i__2 = *m - i__; + dgemv_((char *)"Transpose", &i__2, &i__, &c_b5, &x[i__ + 1 + x_dim1], + ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[ + i__ * y_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *n - i__; + dgemv_((char *)"Transpose", &i__, &i__2, &c_b4, &a[(i__ + 1) * a_dim1 + + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + + 1 + i__ * y_dim1], &c__1, (ftnlen)9); + i__2 = *n - i__; + dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); + } +/* L20: */ + } + } + return 0; + +/* End of DLABRD */ + +} /* dlabrd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlacn2.cpp b/lib/linalg/dlacn2.cpp new file mode 100644 index 0000000000..befb0b4e52 --- /dev/null +++ b/lib/linalg/dlacn2.cpp @@ -0,0 +1,364 @@ +/* fortran/dlacn2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matr +ix-vector products. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLACN2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER KASE, N */ +/* DOUBLE PRECISION EST */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER ISGN( * ), ISAVE( 3 ) */ +/* DOUBLE PRECISION V( * ), X( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLACN2 estimates the 1-norm of a square, real matrix A. */ +/* > Reverse communication is used for evaluating matrix-vector products. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix. N >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension (N) */ +/* > On the final return, V = A*W, where EST = norm(V)/norm(W) */ +/* > (W is not returned). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (N) */ +/* > On an intermediate return, X should be overwritten by */ +/* > A * X, if KASE=1, */ +/* > A**T * X, if KASE=2, */ +/* > and DLACN2 must be re-called with all the other parameters */ +/* > unchanged. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ISGN */ +/* > \verbatim */ +/* > ISGN is INTEGER array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] EST */ +/* > \verbatim */ +/* > EST is DOUBLE PRECISION */ +/* > On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be */ +/* > unchanged from the previous call to DLACN2. */ +/* > On exit, EST is an estimate (a lower bound) for norm(A). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] KASE */ +/* > \verbatim */ +/* > KASE is INTEGER */ +/* > On the initial call to DLACN2, KASE should be 0. */ +/* > On an intermediate return, KASE will be 1 or 2, indicating */ +/* > whether X should be overwritten by A * X or A**T * X. */ +/* > On the final return from DLACN2, KASE will again be 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ISAVE */ +/* > \verbatim */ +/* > ISAVE is INTEGER array, dimension (3) */ +/* > ISAVE is used to save variables between calls to DLACN2 */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Originally named SONEST, dated March 16, 1988. */ +/* > */ +/* > This is a thread safe version of DLACON, which uses the array ISAVE */ +/* > in place of a SAVE statement, as follows: */ +/* > */ +/* > DLACON DLACN2 */ +/* > JUMP ISAVE(1) */ +/* > J ISAVE(2) */ +/* > ITER ISAVE(3) */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Nick Higham, University of Manchester */ + +/* > \par References: */ +/* ================ */ +/* > */ +/* > N.J. Higham, "FORTRAN codes for estimating the one-norm of */ +/* > a real or complex matrix, with applications to condition estimation", */ +/* > ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlacn2_(integer *n, doublereal *v, doublereal *x, + integer *isgn, doublereal *est, integer *kase, integer *isave) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + integer i_dnnt(doublereal *); + + /* Local variables */ + integer i__; + doublereal xs, temp; + extern doublereal dasum_(integer *, doublereal *, integer *); + integer jlast; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern integer idamax_(integer *, doublereal *, integer *); + doublereal altsgn, estold; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --isave; + --isgn; + --x; + --v; + + /* Function Body */ + if (*kase == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = 1. / (doublereal) (*n); +/* L10: */ + } + *kase = 1; + isave[1] = 1; + return 0; + } + + switch (isave[1]) { + case 1: goto L20; + case 2: goto L40; + case 3: goto L70; + case 4: goto L110; + case 5: goto L140; + } + +/* ................ ENTRY (ISAVE( 1 ) = 1) */ +/* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. */ + +L20: + if (*n == 1) { + v[1] = x[1]; + *est = abs(v[1]); +/* ... QUIT */ + goto L150; + } + *est = dasum_(n, &x[1], &c__1); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (x[i__] >= 0.) { + x[i__] = 1.; + } else { + x[i__] = -1.; + } + isgn[i__] = i_dnnt(&x[i__]); +/* L30: */ + } + *kase = 2; + isave[1] = 2; + return 0; + +/* ................ ENTRY (ISAVE( 1 ) = 2) */ +/* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ + +L40: + isave[2] = idamax_(n, &x[1], &c__1); + isave[3] = 2; + +/* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */ + +L50: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = 0.; +/* L60: */ + } + x[isave[2]] = 1.; + *kase = 1; + isave[1] = 3; + return 0; + +/* ................ ENTRY (ISAVE( 1 ) = 3) */ +/* X HAS BEEN OVERWRITTEN BY A*X. */ + +L70: + dcopy_(n, &x[1], &c__1, &v[1], &c__1); + estold = *est; + *est = dasum_(n, &v[1], &c__1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (x[i__] >= 0.) { + xs = 1.; + } else { + xs = -1.; + } + if (i_dnnt(&xs) != isgn[i__]) { + goto L90; + } +/* L80: */ + } +/* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */ + goto L120; + +L90: +/* TEST FOR CYCLING. */ + if (*est <= estold) { + goto L120; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (x[i__] >= 0.) { + x[i__] = 1.; + } else { + x[i__] = -1.; + } + isgn[i__] = i_dnnt(&x[i__]); +/* L100: */ + } + *kase = 2; + isave[1] = 4; + return 0; + +/* ................ ENTRY (ISAVE( 1 ) = 4) */ +/* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */ + +L110: + jlast = isave[2]; + isave[2] = idamax_(n, &x[1], &c__1); + if (x[jlast] != (d__1 = x[isave[2]], abs(d__1)) && isave[3] < 5) { + ++isave[3]; + goto L50; + } + +/* ITERATION COMPLETE. FINAL STAGE. */ + +L120: + altsgn = 1.; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + x[i__] = altsgn * ((doublereal) (i__ - 1) / (doublereal) (*n - 1) + + 1.); + altsgn = -altsgn; +/* L130: */ + } + *kase = 1; + isave[1] = 5; + return 0; + +/* ................ ENTRY (ISAVE( 1 ) = 5) */ +/* X HAS BEEN OVERWRITTEN BY A*X. */ + +L140: + temp = dasum_(n, &x[1], &c__1) / (doublereal) (*n * 3) * 2.; + if (temp > *est) { + dcopy_(n, &x[1], &c__1, &v[1], &c__1); + *est = temp; + } + +L150: + *kase = 0; + return 0; + +/* End of DLACN2 */ + +} /* dlacn2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlacpy.cpp b/lib/linalg/dlacpy.cpp new file mode 100644 index 0000000000..b1e62a5336 --- /dev/null +++ b/lib/linalg/dlacpy.cpp @@ -0,0 +1,200 @@ +/* fortran/dlacpy.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLACPY copies all or part of one two-dimensional array to another. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLACPY + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER LDA, LDB, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLACPY copies all or part of a two-dimensional matrix A to another */ +/* > matrix B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies the part of the matrix A to be copied to B. */ +/* > = 'U': Upper triangular part */ +/* > = 'L': Lower triangular part */ +/* > Otherwise: All of the matrix A */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The m by n matrix A. If UPLO = 'U', only the upper triangle */ +/* > or trapezoid is accessed; if UPLO = 'L', only the lower */ +/* > triangle or trapezoid is accessed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,N) */ +/* > On exit, B = A in the locations specified by UPLO. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal * + a, integer *lda, doublereal *b, integer *ldb, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = a[i__ + j * a_dim1]; +/* L50: */ + } +/* L60: */ + } + } + return 0; + +/* End of DLACPY */ + +} /* dlacpy_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dladiv.cpp b/lib/linalg/dladiv.cpp new file mode 100644 index 0000000000..7ffd5485cc --- /dev/null +++ b/lib/linalg/dladiv.cpp @@ -0,0 +1,277 @@ +/* fortran/dladiv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLADIV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLADIV( A, B, C, D, P, Q ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION A, B, C, D, P, Q */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLADIV performs complex division in real arithmetic */ +/* > */ +/* > a + i*b */ +/* > p + i*q = --------- */ +/* > c + i*d */ +/* > */ +/* > The algorithm is due to Michael Baudin and Robert L. Smith */ +/* > and can be found in the paper */ +/* > (char *)"A Robust Complex Division in Scilab" */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION */ +/* > The scalars a, b, c, and d in the above expression. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] P */ +/* > \verbatim */ +/* > P is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION */ +/* > The scalars p and q in the above expression. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *p, doublereal *q) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Local variables */ + doublereal s, aa, ab, bb, cc, cd, dd, be, un, ov, eps; + extern doublereal dlamch_(char *, ftnlen); + extern /* Subroutine */ int dladiv1_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + aa = *a; + bb = *b; + cc = *c__; + dd = *d__; +/* Computing MAX */ + d__1 = abs(*a), d__2 = abs(*b); + ab = max(d__1,d__2); +/* Computing MAX */ + d__1 = abs(*c__), d__2 = abs(*d__); + cd = max(d__1,d__2); + s = 1.; + ov = dlamch_((char *)"Overflow threshold", (ftnlen)18); + un = dlamch_((char *)"Safe minimum", (ftnlen)12); + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + be = 2. / (eps * eps); + if (ab >= ov * .5) { + aa *= .5; + bb *= .5; + s *= 2.; + } + if (cd >= ov * .5) { + cc *= .5; + dd *= .5; + s *= .5; + } + if (ab <= un * 2. / eps) { + aa *= be; + bb *= be; + s /= be; + } + if (cd <= un * 2. / eps) { + cc *= be; + dd *= be; + s *= be; + } + if (abs(*d__) <= abs(*c__)) { + dladiv1_(&aa, &bb, &cc, &dd, p, q); + } else { + dladiv1_(&bb, &aa, &dd, &cc, p, q); + *q = -(*q); + } + *p *= s; + *q *= s; + + return 0; + +/* End of DLADIV */ + +} /* dladiv_ */ + +/* > \ingroup doubleOTHERauxiliary */ +/* Subroutine */ int dladiv1_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *d__, doublereal *p, doublereal *q) +{ + doublereal r__, t; + extern doublereal dladiv2_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + r__ = *d__ / *c__; + t = 1. / (*c__ + *d__ * r__); + *p = dladiv2_(a, b, c__, d__, &r__, &t); + *a = -(*a); + *q = dladiv2_(b, a, c__, d__, &r__, &t); + + return 0; + +/* End of DLADIV1 */ + +} /* dladiv1_ */ + +/* > \ingroup doubleOTHERauxiliary */ +doublereal dladiv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal + *d__, doublereal *r__, doublereal *t) +{ + /* System generated locals */ + doublereal ret_val; + + /* Local variables */ + doublereal br; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (*r__ != 0.) { + br = *b * *r__; + if (br != 0.) { + ret_val = (*a + br) * *t; + } else { + ret_val = *a * *t + *b * *t * *r__; + } + } else { + ret_val = (*a + *d__ * (*b / *c__)) * *t; + } + + return ret_val; + +/* End of DLADIV2 */ + +} /* dladiv2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlae2.cpp b/lib/linalg/dlae2.cpp new file mode 100644 index 0000000000..2a1be2816d --- /dev/null +++ b/lib/linalg/dlae2.cpp @@ -0,0 +1,210 @@ +/* fortran/dlae2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAE2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAE2( A, B, C, RT1, RT2 ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION A, B, C, RT1, RT2 */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix */ +/* > [ A B ] */ +/* > [ B C ]. */ +/* > On return, RT1 is the eigenvalue of larger absolute value, and RT2 */ +/* > is the eigenvalue of smaller absolute value. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION */ +/* > The (1,1) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION */ +/* > The (1,2) and (2,1) elements of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION */ +/* > The (2,2) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RT1 */ +/* > \verbatim */ +/* > RT1 is DOUBLE PRECISION */ +/* > The eigenvalue of larger absolute value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RT2 */ +/* > \verbatim */ +/* > RT2 is DOUBLE PRECISION */ +/* > The eigenvalue of smaller absolute value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > RT1 is accurate to a few ulps barring over/underflow. */ +/* > */ +/* > RT2 may be inaccurate if there is massive cancellation in the */ +/* > determinant A*C-B*B; higher precision or correctly rounded or */ +/* > correctly truncated arithmetic would be needed to compute RT2 */ +/* > accurately in all cases. */ +/* > */ +/* > Overflow is possible only if RT1 is within a factor of 5 of overflow. */ +/* > Underflow is harmless if the input data is 0 or exceeds */ +/* > underflow_threshold / macheps. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal ab, df, tb, sm, rt, adf, acmn, acmx; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Compute the eigenvalues */ + + sm = *a + *c__; + df = *a - *c__; + adf = abs(df); + tb = *b + *b; + ab = abs(tb); + if (abs(*a) > abs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { +/* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + +/* Order of execution important. */ +/* To get fully accurate smaller eigenvalue, */ +/* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + +/* Order of execution important. */ +/* To get fully accurate smaller eigenvalue, */ +/* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + } + return 0; + +/* End of DLAE2 */ + +} /* dlae2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed0.cpp b/lib/linalg/dlaed0.cpp new file mode 100644 index 0000000000..41b13a3cac --- /dev/null +++ b/lib/linalg/dlaed0.cpp @@ -0,0 +1,534 @@ +/* fortran/dlaed0.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static integer c__2 = 2; +static doublereal c_b23 = 1.; +static doublereal c_b24 = 0.; +static integer c__1 = 1; + +/* > \brief \b DLAED0 used by DSTEDC. Computes all eigenvalues and corresponding eigenvectors of an unreduced +symmetric tridiagonal matrix using the divide and conquer method. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED0 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, */ +/* WORK, IWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), */ +/* $ WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAED0 computes all eigenvalues and corresponding eigenvectors of a */ +/* > symmetric tridiagonal matrix using the divide and conquer method. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > = 0: Compute eigenvalues only. */ +/* > = 1: Compute eigenvectors of original dense symmetric matrix */ +/* > also. On entry, Q contains the orthogonal matrix used */ +/* > to reduce the original matrix to tridiagonal form. */ +/* > = 2: Compute eigenvalues and eigenvectors of tridiagonal */ +/* > matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] QSIZ */ +/* > \verbatim */ +/* > QSIZ is INTEGER */ +/* > The dimension of the orthogonal matrix used to reduce */ +/* > the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the main diagonal of the tridiagonal matrix. */ +/* > On exit, its eigenvalues. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ, N) */ +/* > On entry, Q must contain an N-by-N orthogonal matrix. */ +/* > If ICOMPQ = 0 Q is not referenced. */ +/* > If ICOMPQ = 1 On entry, Q is a subset of the columns of the */ +/* > orthogonal matrix used to reduce the full */ +/* > matrix to tridiagonal form corresponding to */ +/* > the subset of the full matrix which is being */ +/* > decomposed at this time. */ +/* > If ICOMPQ = 2 On entry, Q will be the identity matrix. */ +/* > On exit, Q contains the eigenvectors of the */ +/* > tridiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. If eigenvectors are */ +/* > desired, then LDQ >= max(1,N). In any case, LDQ >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] QSTORE */ +/* > \verbatim */ +/* > QSTORE is DOUBLE PRECISION array, dimension (LDQS, N) */ +/* > Referenced only when ICOMPQ = 1. Used to store parts of */ +/* > the eigenvector matrix when the updating matrix multiplies */ +/* > take place. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQS */ +/* > \verbatim */ +/* > LDQS is INTEGER */ +/* > The leading dimension of the array QSTORE. If ICOMPQ = 1, */ +/* > then LDQS >= max(1,N). In any case, LDQS >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, */ +/* > If ICOMPQ = 0 or 1, the dimension of WORK must be at least */ +/* > 1 + 3*N + 2*N*lg N + 3*N**2 */ +/* > ( lg( N ) = smallest integer k */ +/* > such that 2^k >= N ) */ +/* > If ICOMPQ = 2, the dimension of WORK must be at least */ +/* > 4*N + N**2. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, */ +/* > If ICOMPQ = 0 or 1, the dimension of IWORK must be at least */ +/* > 6 + 6*N + 5*N*lg N. */ +/* > ( lg( N ) = smallest integer k */ +/* > such that 2^k >= N ) */ +/* > If ICOMPQ = 2, the dimension of IWORK must be at least */ +/* > 3 + 5*N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: The algorithm failed to compute an eigenvalue while */ +/* > working on the submatrix lying in rows and columns */ +/* > INFO/(N+1) through mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ + +/* ===================================================================== */ +/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n, + doublereal *d__, doublereal *e, doublereal *q, integer *ldq, + doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2; + doublereal temp; + integer curr; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + integer iperm; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer indxq, iwrem; + extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *); + integer iqptr; + extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, doublereal *, integer *, integer *); + integer tlvls; + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen); + integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer igivnm, submat, curprb, subpbs, igivpt; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + integer curlvl, matsiz, iprmpt, smlsiz; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 2) { + *info = -1; + } else if (*icompq == 1 && *qsiz < max(0,*n)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*ldqs < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAED0", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, (char *)"DLAED0", (char *)" ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* Determine the size and placement of the submatrices, and save in */ +/* the leading elements of IWORK. */ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 */ +/* using rank-1 modifications (cuts). */ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (d__1 = e[smm1], abs(d__1)); + d__[submat] -= (d__1 = e[smm1], abs(d__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + if (*icompq != 2) { + +/* Set up workspaces for eigenvalues only/accumulate new vectors */ +/* routine */ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; + +/* Initialize pointers */ + + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + } + +/* Solve each submatrix eigenproblem at the bottom of the divide and */ +/* conquer tree. */ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + if (*icompq == 2) { + dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &q[submat + + submat * q_dim1], ldq, &work[1], info, (ftnlen)1); + if (*info != 0) { + goto L130; + } + } else { + dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 + + iwork[iqptr + curr]], &matsiz, &work[1], info, (ftnlen)1); + if (*info != 0) { + goto L130; + } + if (*icompq == 1) { + dgemm_((char *)"N", (char *)"N", qsiz, &matsiz, &matsiz, &c_b23, &q[submat * + q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]], + &matsiz, &c_b24, &qstore[submat * qstore_dim1 + 1], + ldqs, (ftnlen)1, (ftnlen)1); + } +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* Successively merge eigensystems of adjacent submatrices */ +/* into eigensystem for the corresponding larger matrix. */ + +/* while ( SUBPBS > 1 ) */ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) */ +/* into an eigensystem of size MATSIZ. */ +/* DLAED1 is used only for the full eigensystem of a tridiagonal */ +/* matrix. */ +/* DLAED7 handles the cases in which eigenvalues only or eigenvalues */ +/* and eigenvectors of a full symmetric matrix (which was reduced to */ +/* tridiagonal form) are desired. */ + + if (*icompq == 2) { + dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1], + ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], & + msd2, &work[1], &iwork[subpbs + 1], info); + } else { + dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, & + iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, & + work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm] + , &iwork[igivpt], &iwork[igivcl], &work[igivnm], & + work[iwrem], &iwork[subpbs + 1], info); + } + if (*info != 0) { + goto L130; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* end while */ + +/* Re-merge the eigenvalues/vectors which were deflated at the final */ +/* merge step. */ + + if (*icompq == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + + 1], &c__1); +/* L100: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + } else if (*icompq == 2) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; + dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1); +/* L110: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + dlacpy_((char *)"A", n, n, &work[*n + 1], n, &q[q_offset], ldq, (ftnlen)1); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + work[i__] = d__[j]; +/* L120: */ + } + dcopy_(n, &work[1], &c__1, &d__[1], &c__1); + } + goto L140; + +L130: + *info = submat * (*n + 1) + submat + matsiz - 1; + +L140: + return 0; + +/* End of DLAED0 */ + +} /* dlaed0_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed1.cpp b/lib/linalg/dlaed1.cpp new file mode 100644 index 0000000000..a448e3363e --- /dev/null +++ b/lib/linalg/dlaed1.cpp @@ -0,0 +1,336 @@ +/* fortran/dlaed1.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* > \brief \b DLAED1 used by DSTEDC. Computes the updated eigensystem of a diagonal matrix after modification + by a rank-one symmetric matrix. Used when the original matrix is tridiagonal. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED1 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, */ +/* INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER CUTPNT, INFO, LDQ, N */ +/* DOUBLE PRECISION RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER INDXQ( * ), IWORK( * ) */ +/* DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAED1 computes the updated eigensystem of a diagonal */ +/* > matrix after modification by a rank-one symmetric matrix. This */ +/* > routine is used only for the eigenproblem which requires all */ +/* > eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles */ +/* > the case in which eigenvalues only or eigenvalues and eigenvectors */ +/* > of a full symmetric matrix (which was reduced to tridiagonal form) */ +/* > are desired. */ +/* > */ +/* > T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) */ +/* > */ +/* > where Z = Q**T*u, u is a vector of length N with ones in the */ +/* > CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */ +/* > */ +/* > The eigenvectors of the original matrix are stored in Q, and the */ +/* > eigenvalues are in D. The algorithm consists of three stages: */ +/* > */ +/* > The first stage consists of deflating the size of the problem */ +/* > when there are multiple eigenvalues or if there is a zero in */ +/* > the Z vector. For each such occurrence the dimension of the */ +/* > secular equation problem is reduced by one. This stage is */ +/* > performed by the routine DLAED2. */ +/* > */ +/* > The second stage consists of calculating the updated */ +/* > eigenvalues. This is done by finding the roots of the secular */ +/* > equation via the routine DLAED4 (as called by DLAED3). */ +/* > This routine also calculates the eigenvectors of the current */ +/* > problem. */ +/* > */ +/* > The final stage consists of computing the updated eigenvectors */ +/* > directly using the updated eigenvalues. The eigenvectors for */ +/* > the current problem are multiplied with the eigenvectors from */ +/* > the overall problem. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the eigenvalues of the rank-1-perturbed matrix. */ +/* > On exit, the eigenvalues of the repaired matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ,N) */ +/* > On entry, the eigenvectors of the rank-1-perturbed matrix. */ +/* > On exit, the eigenvectors of the repaired tridiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] INDXQ */ +/* > \verbatim */ +/* > INDXQ is INTEGER array, dimension (N) */ +/* > On entry, the permutation which separately sorts the two */ +/* > subproblems in D into ascending order. */ +/* > On exit, the permutation which will reintegrate the */ +/* > subproblems back into sorted order, */ +/* > i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > The subdiagonal entry used to create the rank-1 modification. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CUTPNT */ +/* > \verbatim */ +/* > CUTPNT is INTEGER */ +/* > The location of the last eigenvalue in the leading sub-matrix. */ +/* > min(1,N) <= CUTPNT <= N/2. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N + N**2) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, an eigenvalue did not converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA \n */ +/* > Modified by Francoise Tisseur, University of Tennessee */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q, + integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Local variables */ + integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer indxp; + extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, integer *, integer *, integer *), dlaed3_(integer *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + doublereal *, doublereal *, integer *); + integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *, + ftnlen); + integer coltyp; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } else if (*ldq < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) { + *info = -7; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAED1", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* The following values are integer pointers which indicate */ +/* the portion of the workspace */ +/* used by a particular array in DLAED2 and DLAED3. */ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + + +/* Form the z-vector which consists of the last row of Q_1 and the */ +/* first row of Q_2. */ + + dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1); + zpp1 = *cutpnt + 1; + i__1 = *n - *cutpnt; + dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1); + +/* Deflate eigenvalues. */ + + dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[ + iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[ + indxc], &iwork[indxp], &iwork[coltyp], info); + + if (*info != 0) { + goto L20; + } + +/* Solve Secular Equation. */ + + if (k != 0) { + is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + + 1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2; + dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], + &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[ + is], info); + if (*info != 0) { + goto L20; + } + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L10: */ + } + } + +L20: + return 0; + +/* End of DLAED1 */ + +} /* dlaed1_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed2.cpp b/lib/linalg/dlaed2.cpp new file mode 100644 index 0000000000..746c07ba33 --- /dev/null +++ b/lib/linalg/dlaed2.cpp @@ -0,0 +1,646 @@ +/* fortran/dlaed2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b3 = -1.; +static integer c__1 = 1; + +/* > \brief \b DLAED2 used by DSTEDC. Merges eigenvalues and deflates secular equation. Used when the original + matrix is tridiagonal. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, */ +/* Q2, INDX, INDXC, INDXP, COLTYP, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDQ, N, N1 */ +/* DOUBLE PRECISION RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), */ +/* $ INDXQ( * ) */ +/* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), */ +/* $ W( * ), Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAED2 merges the two sets of eigenvalues together into a single */ +/* > sorted set. Then it tries to deflate the size of the problem. */ +/* > There are two ways in which deflation can occur: when two or more */ +/* > eigenvalues are close together or if there is a tiny entry in the */ +/* > Z vector. For each such occurrence the order of the related secular */ +/* > equation problem is reduced by one. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of non-deflated eigenvalues, and the order of the */ +/* > related secular equation. 0 <= K <=N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N1 */ +/* > \verbatim */ +/* > N1 is INTEGER */ +/* > The location of the last eigenvalue in the leading sub-matrix. */ +/* > min(1,N) <= N1 <= N/2. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, D contains the eigenvalues of the two submatrices to */ +/* > be combined. */ +/* > On exit, D contains the trailing (N-K) updated eigenvalues */ +/* > (those which were deflated) sorted into increasing order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ, N) */ +/* > On entry, Q contains the eigenvectors of two submatrices in */ +/* > the two square blocks with corners at (1,1), (N1,N1) */ +/* > and (N1+1, N1+1), (N,N). */ +/* > On exit, Q contains the trailing (N-K) updated eigenvectors */ +/* > (those which were deflated) in its last N-K columns. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] INDXQ */ +/* > \verbatim */ +/* > INDXQ is INTEGER array, dimension (N) */ +/* > The permutation which separately sorts the two sub-problems */ +/* > in D into ascending order. Note that elements in the second */ +/* > half of this permutation must first have N1 added to their */ +/* > values. Destroyed on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > On entry, the off-diagonal element associated with the rank-1 */ +/* > cut which originally split the two submatrices which are now */ +/* > being recombined. */ +/* > On exit, RHO has been modified to the value required by */ +/* > DLAED3. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, Z contains the updating vector (the last */ +/* > row of the first sub-eigenvector matrix and the first row of */ +/* > the second sub-eigenvector matrix). */ +/* > On exit, the contents of Z have been destroyed by the updating */ +/* > process. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DLAMDA */ +/* > \verbatim */ +/* > DLAMDA is DOUBLE PRECISION array, dimension (N) */ +/* > A copy of the first K eigenvalues which will be used by */ +/* > DLAED3 to form the secular equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > The first k values of the final deflation-altered z-vector */ +/* > which will be passed to DLAED3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q2 */ +/* > \verbatim */ +/* > Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) */ +/* > A copy of the first K eigenvectors which will be used by */ +/* > DLAED3 in a matrix multiply (DGEMM) to solve for the new */ +/* > eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDX */ +/* > \verbatim */ +/* > INDX is INTEGER array, dimension (N) */ +/* > The permutation used to sort the contents of DLAMDA into */ +/* > ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDXC */ +/* > \verbatim */ +/* > INDXC is INTEGER array, dimension (N) */ +/* > The permutation used to arrange the columns of the deflated */ +/* > Q matrix into three groups: the first group contains non-zero */ +/* > elements only at and above N1, the second contains */ +/* > non-zero elements only below N1, and the third is dense. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDXP */ +/* > \verbatim */ +/* > INDXP is INTEGER array, dimension (N) */ +/* > The permutation used to place deflated values of D at the end */ +/* > of the array. INDXP(1:K) points to the nondeflated D-values */ +/* > and INDXP(K+1:N) points to the deflated eigenvalues. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] COLTYP */ +/* > \verbatim */ +/* > COLTYP is INTEGER array, dimension (N) */ +/* > During execution, a label which will indicate which of the */ +/* > following types a column in the Q2 matrix is: */ +/* > 1 : non-zero in the upper half only; */ +/* > 2 : dense; */ +/* > 3 : non-zero in the lower half only; */ +/* > 4 : deflated. */ +/* > On exit, COLTYP(i) is the number of columns of type i, */ +/* > for i=1 to 4 only. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA \n */ +/* > Modified by Francoise Tisseur, University of Tennessee */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho, + doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2, + integer *indx, integer *indxc, integer *indxp, integer *coltyp, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__; + integer i__, j; + doublereal s, t; + integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; + doublereal eps, tau, tol; + integer psm[4], imax, jmax; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer ctot[4]; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dcopy_(integer *, doublereal *, integer *, doublereal + *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, + ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + --w; + --q2; + --indx; + --indxc; + --indxp; + --coltyp; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MIN */ + i__1 = 1, i__2 = *n / 2; + if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { + *info = -3; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAED2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n2 = *n - *n1; + n1p1 = *n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b3, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1. Since z is the concatenation of */ +/* two normalized vectors, norm2(z) = sqrt(2). */ + + t = 1. / sqrt(2.); + dscal_(n, &t, &z__[1], &c__1); + +/* RHO = ABS( norm(z)**2 * RHO ) */ + + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + indxq[i__] += *n1; +/* L10: */ + } + +/* re-integrate the deflated parts from the last pass */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; +/* L20: */ + } + dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indx[i__] = indxq[indxc[i__]]; +/* L30: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = dlamch_((char *)"Epsilon", (ftnlen)7); +/* Computing MAX */ + d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2)) + ; + tol = eps * 8. * max(d__3,d__4); + +/* If the rank-1 modifier is small enough, no more needs to be done */ +/* except to reorganize Q so that its columns correspond with the */ +/* elements in D. */ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + iq2 = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__ = indx[j]; + dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + dlamda[j] = d__[i__]; + iq2 += *n; +/* L40: */ + } + dlacpy_((char *)"A", n, n, &q2[1], n, &q[q_offset], ldq, (ftnlen)1); + dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); + goto L190; + } + +/* If there are multiple eigenvalues then the problem deflates. Here */ +/* the number of equal eigenvalues are found. As each equal */ +/* eigenvalue is found, an elementary reflector is computed to rotate */ +/* the corresponding eigensubspace so that the corresponding */ +/* components of Z are zero in this new basis. */ + + i__1 = *n1; + for (i__ = 1; i__ <= i__1; ++i__) { + coltyp[i__] = 1; +/* L50: */ + } + i__1 = *n; + for (i__ = n1p1; i__ <= i__1; ++i__) { + coltyp[i__] = 3; +/* L60: */ + } + + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + nj = indx[j]; + if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + if (j == *n) { + goto L100; + } + } else { + pj = nj; + goto L80; + } +/* L70: */ + } +L80: + ++j; + nj = indx[j]; + if (j > *n) { + goto L100; + } + if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + coltyp[nj] = 4; + indxp[k2] = nj; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[pj]; + c__ = z__[nj]; + +/* Find sqrt(a**2+b**2) without overflow or */ +/* destructive underflow. */ + + tau = dlapy2_(&c__, &s); + t = d__[nj] - d__[pj]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[nj] = tau; + z__[pj] = 0.; + if (coltyp[nj] != coltyp[pj]) { + coltyp[nj] = 2; + } + coltyp[pj] = 4; + drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & + c__, &s); +/* Computing 2nd power */ + d__1 = c__; +/* Computing 2nd power */ + d__2 = s; + t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); +/* Computing 2nd power */ + d__1 = s; +/* Computing 2nd power */ + d__2 = c__; + d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2); + d__[pj] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[pj] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = pj; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = pj; + } + } else { + indxp[k2 + i__ - 1] = pj; + } + pj = nj; + } else { + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + pj = nj; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + dlamda[*k] = d__[pj]; + w[*k] = z__[pj]; + indxp[*k] = pj; + +/* Count up the total number of the various types of columns, then */ +/* form a permutation which positions the four column types into */ +/* four uniform groups (although one or more of these groups may be */ +/* empty). */ + + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; +/* L110: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; +/* L120: */ + } + +/* PSM(*) = Position in SubMatrix (of types 1 through 4) */ + + psm[0] = 1; + psm[1] = ctot[0] + 1; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + *k = *n - ctot[3]; + +/* Fill out the INDXC array so that the permutation which it induces */ +/* will place all type-1 columns first, all type-2 columns next, */ +/* then all type-3's, and finally all type-4's. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + js = indxp[j]; + ct = coltyp[js]; + indx[psm[ct - 1]] = js; + indxc[psm[ct - 1]] = j; + ++psm[ct - 1]; +/* L130: */ + } + +/* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ +/* and Q2 respectively. The eigenvalues/vectors which were not */ +/* deflated go into the first K slots of DLAMDA and Q2 respectively, */ +/* while those which were deflated go into the last N - K slots. */ + + i__ = 1; + iq1 = 1; + iq2 = (ctot[0] + ctot[1]) * *n1 + 1; + i__1 = ctot[0]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; +/* L140: */ + } + + i__1 = ctot[1]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); + dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq1 += *n1; + iq2 += n2; +/* L150: */ + } + + i__1 = ctot[2]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); + z__[i__] = d__[js]; + ++i__; + iq2 += n2; +/* L160: */ + } + + iq1 = iq2; + i__1 = ctot[3]; + for (j = 1; j <= i__1; ++j) { + js = indx[i__]; + dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); + iq2 += *n; + z__[i__] = d__[js]; + ++i__; +/* L170: */ + } + +/* The deflated eigenvalues and their corresponding vectors go back */ +/* into the last N - K slots of D and Q respectively. */ + + if (*k < *n) { + dlacpy_((char *)"A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq, + (ftnlen)1); + i__1 = *n - *k; + dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); + } + +/* Copy CTOT into COLTYP for referencing in DLAED3. */ + + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; +/* L180: */ + } + +L190: + return 0; + +/* End of DLAED2 */ + +} /* dlaed2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed3.cpp b/lib/linalg/dlaed3.cpp new file mode 100644 index 0000000000..e56c3fb41b --- /dev/null +++ b/lib/linalg/dlaed3.cpp @@ -0,0 +1,437 @@ +/* fortran/dlaed3.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b22 = 1.; +static doublereal c_b23 = 0.; + +/* > \brief \b DLAED3 used by DSTEDC. Finds the roots of the secular equation and updates the eigenvectors. Us +ed when the original matrix is tridiagonal. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, */ +/* CTOT, W, S, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDQ, N, N1 */ +/* DOUBLE PRECISION RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER CTOT( * ), INDX( * ) */ +/* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), */ +/* $ S( * ), W( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAED3 finds the roots of the secular equation, as defined by the */ +/* > values in D, W, and RHO, between 1 and K. It makes the */ +/* > appropriate calls to DLAED4 and then updates the eigenvectors by */ +/* > multiplying the matrix of eigenvectors of the pair of eigensystems */ +/* > being combined by the matrix of eigenvectors of the K-by-K system */ +/* > which is solved here. */ +/* > */ +/* > This code makes very mild assumptions about floating point */ +/* > arithmetic. It will work on machines with a guard digit in */ +/* > add/subtract, or on those binary machines without guard digits */ +/* > which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ +/* > It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of terms in the rational function to be solved by */ +/* > DLAED4. K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows and columns in the Q matrix. */ +/* > N >= K (deflation may result in N>K). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N1 */ +/* > \verbatim */ +/* > N1 is INTEGER */ +/* > The location of the last eigenvalue in the leading submatrix. */ +/* > min(1,N) <= N1 <= N/2. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > D(I) contains the updated eigenvalues for */ +/* > 1 <= I <= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ,N) */ +/* > Initially the first K columns are used as workspace. */ +/* > On output the columns 1 to K contain */ +/* > the updated eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > The value of the parameter in the rank one update equation. */ +/* > RHO >= 0 required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DLAMDA */ +/* > \verbatim */ +/* > DLAMDA is DOUBLE PRECISION array, dimension (K) */ +/* > The first K elements of this array contain the old roots */ +/* > of the deflated updating problem. These are the poles */ +/* > of the secular equation. May be changed on output by */ +/* > having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */ +/* > Cray-2, or Cray C-90, as described above. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Q2 */ +/* > \verbatim */ +/* > Q2 is DOUBLE PRECISION array, dimension (LDQ2*N) */ +/* > The first K columns of this matrix contain the non-deflated */ +/* > eigenvectors for the split problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INDX */ +/* > \verbatim */ +/* > INDX is INTEGER array, dimension (N) */ +/* > The permutation used to arrange the columns of the deflated */ +/* > Q matrix into three groups (see DLAED2). */ +/* > The rows of the eigenvectors found by DLAED4 must be likewise */ +/* > permuted before the matrix multiply can take place. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CTOT */ +/* > \verbatim */ +/* > CTOT is INTEGER array, dimension (4) */ +/* > A count of the total number of the various types of columns */ +/* > in Q, as described in INDX. The fourth column type is any */ +/* > column which has been deflated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (K) */ +/* > The first K elements of this array contain the components */ +/* > of the deflation-adjusted updating vector. Destroyed on */ +/* > output. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (N1 + 1)*K */ +/* > Will contain the eigenvectors of the repaired matrix which */ +/* > will be multiplied by the previously accumulated eigenvectors */ +/* > to update the system. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, an eigenvalue did not converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA \n */ +/* > Modified by Francoise Tisseur, University of Tennessee */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal * + d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, + doublereal *q2, integer *indx, integer *ctot, doublereal *w, + doublereal *s, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, j, n2, n12, ii, n23, iq2; + doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer + *), dlaed4_(integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, ftnlen), xerbla_(char *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --q2; + --indx; + --ctot; + --w; + --s; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*n < *k) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAED3", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ +/* be computed with high relative accuracy (barring over/underflow). */ +/* This is a problem on machines without a guard digit in */ +/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ +/* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ +/* which on any of these machines zeros out the bottommost */ +/* bit of DLAMDA(I) if it is 1; this makes the subsequent */ +/* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ +/* occurs. On binary machines with a guard digit (almost all */ +/* machines) it does not change DLAMDA(I) at all. On hexadecimal */ +/* and decimal machines with a guard digit, it slightly */ +/* changes the bottommost bits of DLAMDA(I). It does not account */ +/* for hexadecimal or decimal machines without guard digits */ +/* (we know of none). We use a subroutine call to compute */ +/* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ +/* this code. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1) { + goto L110; + } + if (*k == 2) { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + w[1] = q[j * q_dim1 + 1]; + w[2] = q[j * q_dim1 + 2]; + ii = indx[1]; + q[j * q_dim1 + 1] = w[ii]; + ii = indx[2]; + q[j * q_dim1 + 2] = w[ii]; +/* L30: */ + } + goto L110; + } + +/* Compute updated W. */ + + dcopy_(k, &w[1], &c__1, &s[1], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L40: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } +/* L60: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = sqrt(-w[i__]); + w[i__] = d_sign(&d__1, &s[i__]); +/* L70: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__] = w[i__] / q[i__ + j * q_dim1]; +/* L80: */ + } + temp = dnrm2_(k, &s[1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + ii = indx[i__]; + q[i__ + j * q_dim1] = s[ii] / temp; +/* L90: */ + } +/* L100: */ + } + +/* Compute the updated eigenvectors. */ + +L110: + + n2 = *n - *n1; + n12 = ctot[1] + ctot[2]; + n23 = ctot[2] + ctot[3]; + + dlacpy_((char *)"A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23, (ftnlen) + 1); + iq2 = *n1 * n12 + 1; + if (n23 != 0) { + dgemm_((char *)"N", (char *)"N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, & + c_b23, &q[*n1 + 1 + q_dim1], ldq, (ftnlen)1, (ftnlen)1); + } else { + dlaset_((char *)"A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq, ( + ftnlen)1); + } + + dlacpy_((char *)"A", &n12, k, &q[q_offset], ldq, &s[1], &n12, (ftnlen)1); + if (n12 != 0) { + dgemm_((char *)"N", (char *)"N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, + &q[q_offset], ldq, (ftnlen)1, (ftnlen)1); + } else { + dlaset_((char *)"A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq, (ftnlen)1); + } + + +L120: + return 0; + +/* End of DLAED3 */ + +} /* dlaed3_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed4.cpp b/lib/linalg/dlaed4.cpp new file mode 100644 index 0000000000..5cc622887d --- /dev/null +++ b/lib/linalg/dlaed4.cpp @@ -0,0 +1,1036 @@ +/* fortran/dlaed4.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAED4 used by DSTEDC. Finds a single root of the secular equation. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED4 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER I, INFO, N */ +/* DOUBLE PRECISION DLAM, RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), DELTA( * ), Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This subroutine computes the I-th updated eigenvalue of a symmetric */ +/* > rank-one modification to a diagonal matrix whose elements are */ +/* > given in the array d, and that */ +/* > */ +/* > D(i) < D(j) for i < j */ +/* > */ +/* > and that RHO > 0. This is arranged by the calling routine, and is */ +/* > no loss in generality. The rank-one modified system is thus */ +/* > */ +/* > diag( D ) + RHO * Z * Z_transpose. */ +/* > */ +/* > where we assume the Euclidean norm of Z is 1. */ +/* > */ +/* > The method consists of approximating the rational functions in the */ +/* > secular equation by simpler interpolating rational functions. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The length of all arrays. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] I */ +/* > \verbatim */ +/* > I is INTEGER */ +/* > The index of the eigenvalue to be computed. 1 <= I <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The original eigenvalues. It is assumed that they are in */ +/* > order, D(I) < D(J) for I < J. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (N) */ +/* > The components of the updating vector. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DELTA */ +/* > \verbatim */ +/* > DELTA is DOUBLE PRECISION array, dimension (N) */ +/* > If N > 2, DELTA contains (D(j) - lambda_I) in its j-th */ +/* > component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5 */ +/* > for detail. The vector DELTA contains the information necessary */ +/* > to construct the eigenvectors by DLAED3 and DLAED9. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > The scalar in the symmetric updating formula. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DLAM */ +/* > \verbatim */ +/* > DLAM is DOUBLE PRECISION */ +/* > The computed lambda_I, the I-th updated eigenvalue. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > > 0: if INFO = 1, the updating process failed. */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > Logical variable ORGATI (origin-at-i?) is used for distinguishing */ +/* > whether D(i) or D(i+1) is treated as the origin. */ +/* > */ +/* > ORGATI = .true. origin at i */ +/* > ORGATI = .false. origin at i+1 */ +/* > */ +/* > Logical variable SWTCH3 (switch-for-3-poles?) is for noting */ +/* > if we are working with THREE poles! */ +/* > */ +/* > MAXIT is the maximum number of iterations allowed for each */ +/* > eigenvalue. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ren-Cang Li, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal a, b, c__; + integer j; + doublereal w; + integer ii; + doublereal dw, zz[3]; + integer ip1; + doublereal del, eta, phi, eps, tau, psi; + integer iim1, iip1; + doublereal dphi, dpsi; + integer iter; + doublereal temp, prew, temp1, dltlb, dltub, midpt; + integer niter; + logical swtch; + extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), dlaed6_(integer *, + logical *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + logical swtch3; + extern doublereal dlamch_(char *, ftnlen); + logical orgati; + doublereal erretm, rhoinv; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Since this routine is called in an inner loop, we do no argument */ +/* checking. */ + +/* Quick return for N=1 and 2. */ + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *dlam = d__[1] + *rho * z__[1] * z__[1]; + delta[1] = 1.; + return 0; + } + if (*n == 2) { + dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam); + return 0; + } + +/* Compute machine epsilon */ + + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + rhoinv = 1. / *rho; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + midpt = *rho / 2.; + +/* If ||Z||_2 is not one, then TEMP should be set to */ +/* RHO * ||Z||_2^2 / TWO */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L10: */ + } + + psi = 0.; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[* + n]; + + if (w <= 0.) { + temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho) + + z__[*n] * z__[*n] / *rho; + if (c__ <= temp) { + tau = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n] + ; + b = z__[*n] * z__[*n] * del; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + } + +/* It can be proved that */ +/* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO */ + + dltlb = midpt; + dltub = *rho; + } else { + del = d__[*n] - d__[*n - 1]; + a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * del; + if (a < 0.) { + tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + +/* It can be proved that */ +/* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 */ + + dltlb = 0.; + dltub = midpt; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * ( + dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (c__ < 0.) { + c__ = abs(c__); + } + if (c__ == 0.) { +/* ETA = B/A */ +/* ETA = RHO - TAU */ +/* ETA = DLTUB - TAU */ + +/* Update proposed by Li, Ren-Cang: */ + eta = -w / (dpsi + dphi); + } else if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L50: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi + + dphi); + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + *dlam = d__[*i__] + tau; + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi; + a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * + (dpsi + dphi); + b = delta[*n - 1] * delta[*n] * w; + if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L70: */ + } + + tau += eta; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / delta[*n]; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * ( + dpsi + dphi); + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + *dlam = d__[*i__] + tau; + goto L250; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + del = d__[ip1] - d__[*i__]; + midpt = del / 2.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - midpt; +/* L100: */ + } + + psi = 0.; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / delta[j]; +/* L110: */ + } + + phi = 0.; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / delta[j]; +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] / + delta[ip1]; + + if (w > 0.) { + +/* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 */ + +/* We choose d(i) as origin. */ + + orgati = TRUE_; + a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * del; + if (a > 0.) { + tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } else { + tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } + dltlb = 0.; + dltub = midpt; + } else { + +/* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) */ + +/* We choose d(i+1) as origin. */ + + orgati = FALSE_; + a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * del; + if (a < 0.) { + tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( + d__1)))); + } else { + tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + dltlb = -midpt; + dltub = 0.; + } + + if (orgati) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*i__] - tau; +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[ip1] - tau; +/* L140: */ + } + } + if (orgati) { + ii = *i__; + } else { + ii = *i__ + 1; + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* W is the value of the secular function with */ +/* its ii-th element removed. */ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.) { + swtch3 = TRUE_; + } + } else { + if (w > 0.) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 * + d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 * + d__1); + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] * + dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] * + (dpsi + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[ + iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[ + iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info); + if (*info != 0) { + goto L250; + } + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta >= 0.) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + + prew = w; + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L180: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L190: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L200: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + ( + d__1 = tau + eta, abs(d__1)) * dw; + + swtch = FALSE_; + if (orgati) { + if (-w > abs(prew) / 10.) { + swtch = TRUE_; + } + } else { + if (w > abs(prew) / 10.) { + swtch = TRUE_; + } + } + + tau += eta; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 30; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + goto L250; + } + + if (w <= 0.) { + dltlb = max(dltlb,tau); + } else { + dltub = min(dltub,tau); + } + +/* Calculate the new step */ + + if (! swtch3) { + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / delta[*i__]; + c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * ( + d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / delta[ip1]; + c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * + (d__1 * d__1); + } + } else { + temp = z__[ii] / delta[ii]; + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi; + } + a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] + * dw; + b = delta[*i__] * delta[ip1] * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + delta[ip1] * + delta[ip1] * (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + delta[*i__] * delta[ + *i__] * (dpsi + dphi); + } + } else { + a = delta[*i__] * delta[*i__] * dpsi + delta[ip1] + * delta[ip1] * dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi; + zz[0] = delta[iim1] * delta[iim1] * dpsi; + zz[2] = delta[iip1] * delta[iip1] * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / delta[iim1]; + temp1 *= temp1; + c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] + - d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + + dphi); + } else { + temp1 = z__[iip1] / delta[iip1]; + temp1 *= temp1; + c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] + - d__[iim1]) * temp1; + zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - + temp1)); + zz[2] = z__[iip1] * z__[iip1]; + } + } + dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, + info); + if (*info != 0) { + goto L250; + } + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta >= 0.) { + eta = -w / dw; + } + temp = tau + eta; + if (temp > dltub || temp < dltlb) { + if (w < 0.) { + eta = (dltub - tau) / 2.; + } else { + eta = (dltlb - tau) / 2.; + } + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; +/* L210: */ + } + + tau += eta; + prew = w; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / delta[j]; + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L220: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / delta[j]; + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L230: */ + } + + temp = z__[ii] / delta[ii]; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + + abs(tau) * dw; + if (w * prew > 0. && abs(w) > abs(prew) / 10.) { + swtch = ! swtch; + } + +/* L240: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + if (orgati) { + *dlam = d__[*i__] + tau; + } else { + *dlam = d__[ip1] + tau; + } + + } + +L250: + + return 0; + +/* End of DLAED4 */ + +} /* dlaed4_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed5.cpp b/lib/linalg/dlaed5.cpp new file mode 100644 index 0000000000..14e8a429ee --- /dev/null +++ b/lib/linalg/dlaed5.cpp @@ -0,0 +1,219 @@ +/* fortran/dlaed5.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAED5 used by DSTEDC. Solves the 2-by-2 secular equation. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED5 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER I */ +/* DOUBLE PRECISION DLAM, RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This subroutine computes the I-th eigenvalue of a symmetric rank-one */ +/* > modification of a 2-by-2 diagonal matrix */ +/* > */ +/* > diag( D ) + RHO * Z * transpose(Z) . */ +/* > */ +/* > The diagonal elements in the array D are assumed to satisfy */ +/* > */ +/* > D(i) < D(j) for i < j . */ +/* > */ +/* > We also assume RHO > 0 and that the Euclidean norm of the vector */ +/* > Z is one. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] I */ +/* > \verbatim */ +/* > I is INTEGER */ +/* > The index of the eigenvalue to be computed. I = 1 or I = 2. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (2) */ +/* > The original eigenvalues. We assume D(1) < D(2). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (2) */ +/* > The components of the updating vector. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DELTA */ +/* > \verbatim */ +/* > DELTA is DOUBLE PRECISION array, dimension (2) */ +/* > The vector DELTA contains the information necessary */ +/* > to construct the eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > The scalar in the symmetric updating formula. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DLAM */ +/* > \verbatim */ +/* > DLAM is DOUBLE PRECISION */ +/* > The computed lambda_I, the I-th updated eigenvalue. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ren-Cang Li, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dlam) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal b, c__, w, del, tau, temp; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + if (*i__ == 1) { + w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.; + if (w > 0.) { + b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * del; + +/* B > ZERO, always */ + + tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); + *dlam = d__[1] + tau; + delta[1] = -z__[1] / tau; + delta[2] = z__[2] / (del - tau); + } else { + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.) { + tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); + } else { + tau = (b - sqrt(b * b + c__ * 4.)) / 2.; + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + } + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } else { + +/* Now I=2 */ + + b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * del; + if (b > 0.) { + tau = (b + sqrt(b * b + c__ * 4.)) / 2.; + } else { + tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); + } + *dlam = d__[2] + tau; + delta[1] = -z__[1] / (del + tau); + delta[2] = -z__[2] / tau; + temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]); + delta[1] /= temp; + delta[2] /= temp; + } + return 0; + +/* End of DLAED5 */ + +} /* dlaed5_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed6.cpp b/lib/linalg/dlaed6.cpp new file mode 100644 index 0000000000..7a855780f0 --- /dev/null +++ b/lib/linalg/dlaed6.cpp @@ -0,0 +1,463 @@ +/* fortran/dlaed6.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAED6 used by DSTEDC. Computes one Newton step in solution of the secular equation. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED6 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) */ + +/* .. Scalar Arguments .. */ +/* LOGICAL ORGATI */ +/* INTEGER INFO, KNITER */ +/* DOUBLE PRECISION FINIT, RHO, TAU */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( 3 ), Z( 3 ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAED6 computes the positive or negative root (closest to the origin) */ +/* > of */ +/* > z(1) z(2) z(3) */ +/* > f(x) = rho + --------- + ---------- + --------- */ +/* > d(1)-x d(2)-x d(3)-x */ +/* > */ +/* > It is assumed that */ +/* > */ +/* > if ORGATI = .true. the root is between d(2) and d(3); */ +/* > otherwise it is between d(1) and d(2) */ +/* > */ +/* > This routine will be called by DLAED4 when necessary. In most cases, */ +/* > the root sought is the smallest in magnitude, though it might not be */ +/* > in some extremely rare situations. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] KNITER */ +/* > \verbatim */ +/* > KNITER is INTEGER */ +/* > Refer to DLAED4 for its significance. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ORGATI */ +/* > \verbatim */ +/* > ORGATI is LOGICAL */ +/* > If ORGATI is true, the needed root is between d(2) and */ +/* > d(3); otherwise it is between d(1) and d(2). See */ +/* > DLAED4 for further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > Refer to the equation f(x) above. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (3) */ +/* > D satisfies d(1) < d(2) < d(3). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (3) */ +/* > Each of the elements in z must be positive. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] FINIT */ +/* > \verbatim */ +/* > FINIT is DOUBLE PRECISION */ +/* > The value of f at 0. It is more accurate than the one */ +/* > evaluated inside this routine (if someone wants to do */ +/* > so). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION */ +/* > The root of the equation f(x). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > > 0: if INFO = 1, failure to converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > 10/02/03: This version has a few statements commented out for thread */ +/* > safety (machine parameters are computed on each entry). SJH. */ +/* > */ +/* > 05/10/06: Modified from a new version of Ren-Cang Li, use */ +/* > Gragg-Thornton-Warner cubic convergent scheme for better stability. */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ren-Cang Li, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal * + rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal * + tau, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3, d__4; + + /* Builtin functions */ + double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *); + + /* Local variables */ + doublereal a, b, c__, f; + integer i__; + doublereal fc, df, ddf, lbd, eta, ubd, eps, base; + integer iter; + doublereal temp, temp1, temp2, temp3, temp4; + logical scale; + integer niter; + doublereal small1, small2, sminv1, sminv2; + extern doublereal dlamch_(char *, ftnlen); + doublereal dscale[3], sclfac, zscale[3], erretm, sclinv; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + --d__; + + /* Function Body */ + *info = 0; + + if (*orgati) { + lbd = d__[2]; + ubd = d__[3]; + } else { + lbd = d__[1]; + ubd = d__[2]; + } + if (*finit < 0.) { + lbd = 0.; + } else { + ubd = 0.; + } + + niter = 1; + *tau = 0.; + if (*kniter == 2) { + if (*orgati) { + temp = (d__[3] - d__[2]) / 2.; + c__ = *rho + z__[1] / (d__[1] - d__[2] - temp); + a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3]; + b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2]; + } else { + temp = (d__[1] - d__[2]) / 2.; + c__ = *rho + z__[3] / (d__[3] - d__[2] - temp); + a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2]; + b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1]; + } +/* Computing MAX */ + d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); + temp = max(d__1,d__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.) { + *tau = b / a; + } else if (a <= 0.) { + *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)) + )); + } + if (*tau < lbd || *tau > ubd) { + *tau = (lbd + ubd) / 2.; + } + if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) { + *tau = 0.; + } else { + temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau + * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / ( + d__[3] * (d__[3] - *tau)); + if (temp <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } + if (abs(*finit) <= abs(temp)) { + *tau = 0.; + } + } + } + +/* get machine parameters for possible scaling to avoid overflow */ + +/* modified by Sven: parameters SMALL1, SMINV1, SMALL2, */ +/* SMINV2, EPS are not SAVEd anymore between one call to the */ +/* others but recomputed at each call */ + + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + base = dlamch_((char *)"Base", (ftnlen)4); + i__1 = (integer) (log(dlamch_((char *)"SafMin", (ftnlen)6)) / log(base) / 3.); + small1 = pow_di(&base, &i__1); + sminv1 = 1. / small1; + small2 = small1 * small1; + sminv2 = sminv1 * sminv1; + +/* Determine if scaling of inputs necessary to avoid overflow */ +/* when computing 1/TEMP**3 */ + + if (*orgati) { +/* Computing MIN */ + d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - * + tau, abs(d__2)); + temp = min(d__3,d__4); + } else { +/* Computing MIN */ + d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - * + tau, abs(d__2)); + temp = min(d__3,d__4); + } + scale = FALSE_; + if (temp <= small1) { + scale = TRUE_; + if (temp <= small2) { + +/* Scale up by power of radix nearest 1/SAFMIN**(2/3) */ + + sclfac = sminv2; + sclinv = small2; + } else { + +/* Scale up by power of radix nearest 1/SAFMIN**(1/3) */ + + sclfac = sminv1; + sclinv = small1; + } + +/* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__] * sclfac; + zscale[i__ - 1] = z__[i__] * sclfac; +/* L10: */ + } + *tau *= sclfac; + lbd *= sclfac; + ubd *= sclfac; + } else { + +/* Copy D and Z to DSCALE and ZSCALE */ + + for (i__ = 1; i__ <= 3; ++i__) { + dscale[i__ - 1] = d__[i__]; + zscale[i__ - 1] = z__[i__]; +/* L20: */ + } + } + + fc = 0.; + df = 0.; + ddf = 0.; + for (i__ = 1; i__ <= 3; ++i__) { + temp = 1. / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + fc += temp1 / dscale[i__ - 1]; + df += temp2; + ddf += temp3; +/* L30: */ + } + f = *finit + *tau * fc; + + if (abs(f) <= 0.) { + goto L60; + } + if (f <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } + +/* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent */ +/* scheme */ + +/* It is not hard to see that */ + +/* 1) Iterations will go up monotonically */ +/* if FINIT < 0; */ + +/* 2) Iterations will go down monotonically */ +/* if FINIT > 0. */ + + iter = niter + 1; + + for (niter = iter; niter <= 40; ++niter) { + + if (*orgati) { + temp1 = dscale[1] - *tau; + temp2 = dscale[2] - *tau; + } else { + temp1 = dscale[0] - *tau; + temp2 = dscale[1] - *tau; + } + a = (temp1 + temp2) * f - temp1 * temp2 * df; + b = temp1 * temp2 * f; + c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf; +/* Computing MAX */ + d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__); + temp = max(d__1,d__2); + a /= temp; + b /= temp; + c__ /= temp; + if (c__ == 0.) { + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + if (f * eta >= 0.) { + eta = -f / df; + } + + *tau += eta; + if (*tau < lbd || *tau > ubd) { + *tau = (lbd + ubd) / 2.; + } + + fc = 0.; + erretm = 0.; + df = 0.; + ddf = 0.; + for (i__ = 1; i__ <= 3; ++i__) { + if (dscale[i__ - 1] - *tau != 0.) { + temp = 1. / (dscale[i__ - 1] - *tau); + temp1 = zscale[i__ - 1] * temp; + temp2 = temp1 * temp; + temp3 = temp2 * temp; + temp4 = temp1 / dscale[i__ - 1]; + fc += temp4; + erretm += abs(temp4); + df += temp2; + ddf += temp3; + } else { + goto L60; + } +/* L40: */ + } + f = *finit + *tau * fc; + erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df; + if (abs(f) <= eps * 4. * erretm || ubd - lbd <= eps * 4. * abs(*tau)) + { + goto L60; + } + if (f <= 0.) { + lbd = *tau; + } else { + ubd = *tau; + } +/* L50: */ + } + *info = 1; +L60: + +/* Undo scaling */ + + if (scale) { + *tau *= sclinv; + } + return 0; + +/* End of DLAED6 */ + +} /* dlaed6_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed7.cpp b/lib/linalg/dlaed7.cpp new file mode 100644 index 0000000000..367bf37a46 --- /dev/null +++ b/lib/linalg/dlaed7.cpp @@ -0,0 +1,483 @@ +/* fortran/dlaed7.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__2 = 2; +static integer c__1 = 1; +static doublereal c_b10 = 1.; +static doublereal c_b11 = 0.; +static integer c_n1 = -1; + +/* > \brief \b DLAED7 used by DSTEDC. Computes the updated eigensystem of a diagonal matrix after modification + by a rank-one symmetric matrix. Used when the original matrix is dense. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED7 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, */ +/* LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, */ +/* PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, */ +/* INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, */ +/* $ QSIZ, TLVLS */ +/* DOUBLE PRECISION RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), */ +/* $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) */ +/* DOUBLE PRECISION D( * ), GIVNUM( 2, * ), Q( LDQ, * ), */ +/* $ QSTORE( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAED7 computes the updated eigensystem of a diagonal */ +/* > matrix after modification by a rank-one symmetric matrix. This */ +/* > routine is used only for the eigenproblem which requires all */ +/* > eigenvalues and optionally eigenvectors of a dense symmetric matrix */ +/* > that has been reduced to tridiagonal form. DLAED1 handles */ +/* > the case in which all eigenvalues and eigenvectors of a symmetric */ +/* > tridiagonal matrix are desired. */ +/* > */ +/* > T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) */ +/* > */ +/* > where Z = Q**Tu, u is a vector of length N with ones in the */ +/* > CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */ +/* > */ +/* > The eigenvectors of the original matrix are stored in Q, and the */ +/* > eigenvalues are in D. The algorithm consists of three stages: */ +/* > */ +/* > The first stage consists of deflating the size of the problem */ +/* > when there are multiple eigenvalues or if there is a zero in */ +/* > the Z vector. For each such occurrence the dimension of the */ +/* > secular equation problem is reduced by one. This stage is */ +/* > performed by the routine DLAED8. */ +/* > */ +/* > The second stage consists of calculating the updated */ +/* > eigenvalues. This is done by finding the roots of the secular */ +/* > equation via the routine DLAED4 (as called by DLAED9). */ +/* > This routine also calculates the eigenvectors of the current */ +/* > problem. */ +/* > */ +/* > The final stage consists of computing the updated eigenvectors */ +/* > directly using the updated eigenvalues. The eigenvectors for */ +/* > the current problem are multiplied with the eigenvectors from */ +/* > the overall problem. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > = 0: Compute eigenvalues only. */ +/* > = 1: Compute eigenvectors of original dense symmetric matrix */ +/* > also. On entry, Q contains the orthogonal matrix used */ +/* > to reduce the original matrix to tridiagonal form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] QSIZ */ +/* > \verbatim */ +/* > QSIZ is INTEGER */ +/* > The dimension of the orthogonal matrix used to reduce */ +/* > the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TLVLS */ +/* > \verbatim */ +/* > TLVLS is INTEGER */ +/* > The total number of merging levels in the overall divide and */ +/* > conquer tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CURLVL */ +/* > \verbatim */ +/* > CURLVL is INTEGER */ +/* > The current level in the overall merge routine, */ +/* > 0 <= CURLVL <= TLVLS. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CURPBM */ +/* > \verbatim */ +/* > CURPBM is INTEGER */ +/* > The current problem in the current level in the overall */ +/* > merge routine (counting from upper left to lower right). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the eigenvalues of the rank-1-perturbed matrix. */ +/* > On exit, the eigenvalues of the repaired matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ, N) */ +/* > On entry, the eigenvectors of the rank-1-perturbed matrix. */ +/* > On exit, the eigenvectors of the repaired tridiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDXQ */ +/* > \verbatim */ +/* > INDXQ is INTEGER array, dimension (N) */ +/* > The permutation which will reintegrate the subproblem just */ +/* > solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) */ +/* > will be in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > The subdiagonal element used to create the rank-1 */ +/* > modification. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CUTPNT */ +/* > \verbatim */ +/* > CUTPNT is INTEGER */ +/* > Contains the location of the last eigenvalue in the leading */ +/* > sub-matrix. min(1,N) <= CUTPNT <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] QSTORE */ +/* > \verbatim */ +/* > QSTORE is DOUBLE PRECISION array, dimension (N**2+1) */ +/* > Stores eigenvectors of submatrices encountered during */ +/* > divide and conquer, packed together. QPTR points to */ +/* > beginning of the submatrices. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] QPTR */ +/* > \verbatim */ +/* > QPTR is INTEGER array, dimension (N+2) */ +/* > List of indices pointing to beginning of submatrices stored */ +/* > in QSTORE. The submatrices are numbered starting at the */ +/* > bottom left of the divide and conquer tree, from left to */ +/* > right and bottom to top. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PRMPTR */ +/* > \verbatim */ +/* > PRMPTR is INTEGER array, dimension (N lg N) */ +/* > Contains a list of pointers which indicate where in PERM a */ +/* > level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */ +/* > indicates the size of the permutation and also the size of */ +/* > the full, non-deflated problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension (N lg N) */ +/* > Contains the permutations (from deflation and sorting) to be */ +/* > applied to each eigenblock. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER array, dimension (N lg N) */ +/* > Contains a list of pointers which indicate where in GIVCOL a */ +/* > level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */ +/* > indicates the number of Givens rotations. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension (2, N lg N) */ +/* > Each pair of numbers indicates a pair of columns to take place */ +/* > in a Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension (2, N lg N) */ +/* > Each number indicates the S value to be used in the */ +/* > corresponding Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (3*N+2*QSIZ*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, an eigenvalue did not converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ + +/* ===================================================================== */ +/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer + *cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer * + perm, integer *givptr, integer *givcol, doublereal *givnum, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + integer indxc, indxp; + extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *), dlaed9_(integer *, + integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *), dlaeda_(integer *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, doublereal *, integer *, doublereal *, doublereal *, integer *) + ; + integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *, + ftnlen); + integer coltyp; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -9; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAED7", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* The following values are for bookkeeping purposes only. They are */ +/* integer pointers which indicate the portion of the workspace */ +/* used by a particular array in DLAED8 and DLAED9. */ + + if (*icompq == 1) { + ldq2 = *qsiz; + } else { + ldq2 = *n; + } + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq2 = iw + *n; + is = iq2 + *n * ldq2; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* Form the z-vector which consists of the last row of Q_1 and the */ +/* first row of Q_2. */ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz + + *n], info); + +/* When solving the final problem, we no longer need the stored data, */ +/* so we will overwrite the data from this level onto the previously */ +/* used storage space. */ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, + cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], & + perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1) + + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[ + indx], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], + &work[iw], &qstore[qptr[curr]], &k, info); + if (*info != 0) { + goto L30; + } + if (*icompq == 1) { + dgemm_((char *)"N", (char *)"N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[ + qptr[curr]], &k, &c_b11, &q[q_offset], ldq, (ftnlen)1, ( + ftnlen)1); + } +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + +/* Prepare the INDXQ sorting permutation. */ + + n1 = k; + n2 = *n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + +L30: + return 0; + +/* End of DLAED7 */ + +} /* dlaed7_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed8.cpp b/lib/linalg/dlaed8.cpp new file mode 100644 index 0000000000..263bea75d5 --- /dev/null +++ b/lib/linalg/dlaed8.cpp @@ -0,0 +1,610 @@ +/* fortran/dlaed8.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b3 = -1.; +static integer c__1 = 1; + +/* > \brief \b DLAED8 used by DSTEDC. Merges eigenvalues and deflates secular equation. Used when the original + matrix is dense. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED8 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, */ +/* CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, */ +/* GIVCOL, GIVNUM, INDXP, INDX, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, */ +/* $ QSIZ */ +/* DOUBLE PRECISION RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), */ +/* $ INDXQ( * ), PERM( * ) */ +/* DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), */ +/* $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAED8 merges the two sets of eigenvalues together into a single */ +/* > sorted set. Then it tries to deflate the size of the problem. */ +/* > There are two ways in which deflation can occur: when two or more */ +/* > eigenvalues are close together or if there is a tiny element in the */ +/* > Z vector. For each such occurrence the order of the related secular */ +/* > equation problem is reduced by one. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > = 0: Compute eigenvalues only. */ +/* > = 1: Compute eigenvectors of original dense symmetric matrix */ +/* > also. On entry, Q contains the orthogonal matrix used */ +/* > to reduce the original matrix to tridiagonal form. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of non-deflated eigenvalues, and the order of the */ +/* > related secular equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] QSIZ */ +/* > \verbatim */ +/* > QSIZ is INTEGER */ +/* > The dimension of the orthogonal matrix used to reduce */ +/* > the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the eigenvalues of the two submatrices to be */ +/* > combined. On exit, the trailing (N-K) updated eigenvalues */ +/* > (those which were deflated) sorted into increasing order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ,N) */ +/* > If ICOMPQ = 0, Q is not referenced. Otherwise, */ +/* > on entry, Q contains the eigenvectors of the partially solved */ +/* > system which has been previously updated in matrix */ +/* > multiplies with other partially solved eigensystems. */ +/* > On exit, Q contains the trailing (N-K) updated eigenvectors */ +/* > (those which were deflated) in its last N-K columns. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INDXQ */ +/* > \verbatim */ +/* > INDXQ is INTEGER array, dimension (N) */ +/* > The permutation which separately sorts the two sub-problems */ +/* > in D into ascending order. Note that elements in the second */ +/* > half of this permutation must first have CUTPNT added to */ +/* > their values in order to be accurate. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > On entry, the off-diagonal element associated with the rank-1 */ +/* > cut which originally split the two submatrices which are now */ +/* > being recombined. */ +/* > On exit, RHO has been modified to the value required by */ +/* > DLAED3. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CUTPNT */ +/* > \verbatim */ +/* > CUTPNT is INTEGER */ +/* > The location of the last eigenvalue in the leading */ +/* > sub-matrix. min(1,N) <= CUTPNT <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, Z contains the updating vector (the last row of */ +/* > the first sub-eigenvector matrix and the first row of the */ +/* > second sub-eigenvector matrix). */ +/* > On exit, the contents of Z are destroyed by the updating */ +/* > process. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DLAMDA */ +/* > \verbatim */ +/* > DLAMDA is DOUBLE PRECISION array, dimension (N) */ +/* > A copy of the first K eigenvalues which will be used by */ +/* > DLAED3 to form the secular equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q2 */ +/* > \verbatim */ +/* > Q2 is DOUBLE PRECISION array, dimension (LDQ2,N) */ +/* > If ICOMPQ = 0, Q2 is not referenced. Otherwise, */ +/* > a copy of the first K eigenvectors which will be used by */ +/* > DLAED7 in a matrix multiply (DGEMM) to update the new */ +/* > eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ2 */ +/* > \verbatim */ +/* > LDQ2 is INTEGER */ +/* > The leading dimension of the array Q2. LDQ2 >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > The first k values of the final deflation-altered z-vector and */ +/* > will be passed to DLAED3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension (N) */ +/* > The permutations (from deflation and sorting) to be applied */ +/* > to each eigenblock. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER */ +/* > The number of Givens rotations which took place in this */ +/* > subproblem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension (2, N) */ +/* > Each pair of numbers indicates a pair of columns to take place */ +/* > in a Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension (2, N) */ +/* > Each number indicates the S value to be used in the */ +/* > corresponding Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDXP */ +/* > \verbatim */ +/* > INDXP is INTEGER array, dimension (N) */ +/* > The permutation used to place deflated values of D at the end */ +/* > of the array. INDXP(1:K) points to the nondeflated D-values */ +/* > and INDXP(K+1:N) points to the deflated eigenvalues. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDX */ +/* > \verbatim */ +/* > INDX is INTEGER array, dimension (N) */ +/* > The permutation used to sort the contents of D into ascending */ +/* > order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ + +/* ===================================================================== */ +/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer + *qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq, + doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda, + doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer + *givptr, integer *givcol, doublereal *givnum, integer *indxp, integer + *indx, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__; + integer i__, j; + doublereal s, t; + integer k2, n1, n2, jp, n1p1; + doublereal eps, tau, tol; + integer jlam, imax, jmax; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *), dscal_( + integer *, doublereal *, doublereal *, integer *), dcopy_(integer + *, doublereal *, integer *, doublereal *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, + ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ + +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --perm; + givcol -= 3; + givnum -= 3; + --indxp; + --indx; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*n < 0) { + *info = -3; + } else if (*icompq == 1 && *qsiz < *n) { + *info = -4; + } else if (*ldq < max(1,*n)) { + *info = -7; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -10; + } else if (*ldq2 < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAED8", &i__1, (ftnlen)6); + return 0; + } + +/* Need to initialize GIVPTR to O here in case of quick exit */ +/* to prevent an unspecified code behavior (usually sigfault) */ +/* when IWORK array on entry to *stedc is not zeroed */ +/* (or at least some IWORK entries which used in *laed7 for GIVPTR). */ + + *givptr = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b3, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1. / sqrt(2.); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + dscal_(n, &t, &z__[1], &c__1); + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); + +/* If the rank-1 modifier is small enough, no more needs to be done */ +/* except to reorganize Q so that its columns correspond with the */ +/* elements in D. */ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + + 1], &c__1); +/* L60: */ + } + dlacpy_((char *)"A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq, + (ftnlen)1); + } + return 0; + } + +/* If there are multiple eigenvalues then the problem deflates. Here */ +/* the number of equal eigenvalues are found. As each equal */ +/* eigenvalue is found, an elementary reflector is computed to rotate */ +/* the corresponding eigensubspace so that the corresponding */ +/* components of Z are zero in this new basis. */ + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L110; + } + } else { + jlam = j; + goto L80; + } +/* L70: */ + } +L80: + ++j; + if (j > *n) { + goto L100; + } + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* Find sqrt(a**2+b**2) without overflow or */ +/* destructive underflow. */ + + tau = dlapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + if (*icompq == 1) { + drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ + indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + } + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L90: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L90; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L80; +L100: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L110: + +/* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ +/* and Q2 respectively. The eigenvalues/vectors which were not */ +/* deflated go into the first K slots of DLAMDA and Q2 respectively, */ +/* while those which were deflated go into the last N - K slots. */ + + if (*icompq == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L130: */ + } + } + +/* The deflated eigenvalues and their corresponding vectors go back */ +/* into the last N - K slots of D and Q respectively. */ + + if (*k < *n) { + if (*icompq == 0) { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + } else { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + dlacpy_((char *)"A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* + k + 1) * q_dim1 + 1], ldq, (ftnlen)1); + } + } + + return 0; + +/* End of DLAED8 */ + +} /* dlaed8_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaed9.cpp b/lib/linalg/dlaed9.cpp new file mode 100644 index 0000000000..f74b8f5bc0 --- /dev/null +++ b/lib/linalg/dlaed9.cpp @@ -0,0 +1,370 @@ +/* fortran/dlaed9.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DLAED9 used by DSTEDC. Finds the roots of the secular equation and updates the eigenvectors. Us +ed when the original matrix is dense. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAED9 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, */ +/* S, LDS, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N */ +/* DOUBLE PRECISION RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), */ +/* $ W( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAED9 finds the roots of the secular equation, as defined by the */ +/* > values in D, Z, and RHO, between KSTART and KSTOP. It makes the */ +/* > appropriate calls to DLAED4 and then stores the new matrix of */ +/* > eigenvectors for use in calculating the next level of Z vectors. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of terms in the rational function to be solved by */ +/* > DLAED4. K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KSTART */ +/* > \verbatim */ +/* > KSTART is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KSTOP */ +/* > \verbatim */ +/* > KSTOP is INTEGER */ +/* > The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP */ +/* > are to be computed. 1 <= KSTART <= KSTOP <= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows and columns in the Q matrix. */ +/* > N >= K (delation may result in N > K). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > D(I) contains the updated eigenvalues */ +/* > for KSTART <= I <= KSTOP. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (LDQ,N) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max( 1, N ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > The value of the parameter in the rank one update equation. */ +/* > RHO >= 0 required. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DLAMDA */ +/* > \verbatim */ +/* > DLAMDA is DOUBLE PRECISION array, dimension (K) */ +/* > The first K elements of this array contain the old roots */ +/* > of the deflated updating problem. These are the poles */ +/* > of the secular equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (K) */ +/* > The first K elements of this array contain the components */ +/* > of the deflation-adjusted updating vector. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension (LDS, K) */ +/* > Will contain the eigenvectors of the repaired matrix which */ +/* > will be stored for subsequent Z vector calculation and */ +/* > multiplied by the previously accumulated eigenvectors */ +/* > to update the system. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDS */ +/* > \verbatim */ +/* > LDS is INTEGER */ +/* > The leading dimension of S. LDS >= max( 1, K ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, an eigenvalue did not converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ + +/* ===================================================================== */ +/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop, + integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * + rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, + integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, j; + doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlaed4_(integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dlamda; + --w; + s_dim1 = *lds; + s_offset = 1 + s_dim1; + s -= s_offset; + + /* Function Body */ + *info = 0; + + if (*k < 0) { + *info = -1; + } else if (*kstart < 1 || *kstart > max(1,*k)) { + *info = -2; + } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { + *info = -3; + } else if (*n < *k) { + *info = -4; + } else if (*ldq < max(1,*k)) { + *info = -7; + } else if (*lds < max(1,*k)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAED9", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*k == 0) { + return 0; + } + +/* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ +/* be computed with high relative accuracy (barring over/underflow). */ +/* This is a problem on machines without a guard digit in */ +/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ +/* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ +/* which on any of these machines zeros out the bottommost */ +/* bit of DLAMDA(I) if it is 1; this makes the subsequent */ +/* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ +/* occurs. On binary machines with a guard digit (almost all */ +/* machines) it does not change DLAMDA(I) at all. On hexadecimal */ +/* and decimal machines with a guard digit, it slightly */ +/* changes the bottommost bits of DLAMDA(I). It does not account */ +/* for hexadecimal or decimal machines without guard digits */ +/* (we know of none). We use a subroutine call to compute */ +/* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ +/* this code. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; +/* L10: */ + } + + i__1 = *kstop; + for (j = *kstart; j <= i__1; ++j) { + dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], + info); + +/* If the zero finder fails, the computation is terminated. */ + + if (*info != 0) { + goto L120; + } +/* L20: */ + } + + if (*k == 1 || *k == 2) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *k; + for (j = 1; j <= i__2; ++j) { + s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; +/* L30: */ + } +/* L40: */ + } + goto L120; + } + +/* Compute updated W. */ + + dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1); + +/* Initialize W(I) = Q(I,I) */ + + i__1 = *ldq + 1; + dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L50: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); +/* L60: */ + } +/* L70: */ + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__1 = sqrt(-w[i__]); + w[i__] = d_sign(&d__1, &s[i__ + s_dim1]); +/* L80: */ + } + +/* Compute eigenvectors of the modified rank-1 modification. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; +/* L90: */ + } + temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1); + i__2 = *k; + for (i__ = 1; i__ <= i__2; ++i__) { + s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; +/* L100: */ + } +/* L110: */ + } + +L120: + return 0; + +/* End of DLAED9 */ + +} /* dlaed9_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaeda.cpp b/lib/linalg/dlaeda.cpp new file mode 100644 index 0000000000..f487609f82 --- /dev/null +++ b/lib/linalg/dlaeda.cpp @@ -0,0 +1,386 @@ +/* fortran/dlaeda.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__2 = 2; +static integer c__1 = 1; +static doublereal c_b24 = 1.; +static doublereal c_b26 = 0.; + +/* > \brief \b DLAEDA used by DSTEDC. Computes the Z vector determining the rank-one modification of the diago +nal matrix. Used when the original matrix is dense. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAEDA + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, */ +/* GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER CURLVL, CURPBM, INFO, N, TLVLS */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), */ +/* $ PRMPTR( * ), QPTR( * ) */ +/* DOUBLE PRECISION GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAEDA computes the Z vector corresponding to the merge step in the */ +/* > CURLVLth step of the merge process with TLVLS steps for the CURPBMth */ +/* > problem. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TLVLS */ +/* > \verbatim */ +/* > TLVLS is INTEGER */ +/* > The total number of merging levels in the overall divide and */ +/* > conquer tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CURLVL */ +/* > \verbatim */ +/* > CURLVL is INTEGER */ +/* > The current level in the overall merge routine, */ +/* > 0 <= curlvl <= tlvls. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CURPBM */ +/* > \verbatim */ +/* > CURPBM is INTEGER */ +/* > The current problem in the current level in the overall */ +/* > merge routine (counting from upper left to lower right). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PRMPTR */ +/* > \verbatim */ +/* > PRMPTR is INTEGER array, dimension (N lg N) */ +/* > Contains a list of pointers which indicate where in PERM a */ +/* > level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */ +/* > indicates the size of the permutation and incidentally the */ +/* > size of the full, non-deflated problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension (N lg N) */ +/* > Contains the permutations (from deflation and sorting) to be */ +/* > applied to each eigenblock. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER array, dimension (N lg N) */ +/* > Contains a list of pointers which indicate where in GIVCOL a */ +/* > level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */ +/* > indicates the number of Givens rotations. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension (2, N lg N) */ +/* > Each pair of numbers indicates a pair of columns to take place */ +/* > in a Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension (2, N lg N) */ +/* > Each number indicates the S value to be used in the */ +/* > corresponding Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Q */ +/* > \verbatim */ +/* > Q is DOUBLE PRECISION array, dimension (N**2) */ +/* > Contains the square eigenblocks from previous levels, the */ +/* > starting positions for blocks are given by QPTR. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] QPTR */ +/* > \verbatim */ +/* > QPTR is INTEGER array, dimension (N+2) */ +/* > Contains a list of pointers which indicate where in Q an */ +/* > eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates */ +/* > the size of the block. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (N) */ +/* > On output this vector contains the updating vector (the last */ +/* > row of the first sub-eigenvector matrix and the first row of */ +/* > the second sub-eigenvector matrix). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ZTEMP */ +/* > \verbatim */ +/* > ZTEMP is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ + +/* ===================================================================== */ +/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl, + integer *curpbm, integer *prmptr, integer *perm, integer *givptr, + integer *givcol, doublereal *givnum, doublereal *q, integer *qptr, + doublereal *z__, doublereal *ztemp, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + integer i__, k, mid, ptr; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen), dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *), xerbla_(char *, + integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ztemp; + --z__; + --qptr; + --q; + givnum -= 3; + givcol -= 3; + --givptr; + --perm; + --prmptr; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -1; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAEDA", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine location of first number in second half. */ + + mid = *n / 2 + 1; + +/* Gather last/first rows of appropriate eigenblocks into center of Z */ + + ptr = 1; + +/* Determine location of lowest level subproblem in the full storage */ +/* scheme */ + + i__1 = *curlvl - 1; + curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; + +/* Determine size of these matrices. We add HALF to the value of */ +/* the SQRT in case the machine underestimates one of these square */ +/* roots. */ + + bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5); + bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) + + .5); + i__1 = mid - bsiz1 - 1; + for (k = 1; k <= i__1; ++k) { + z__[k] = 0.; +/* L10: */ + } + dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & + c__1); + dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); + i__1 = *n; + for (k = mid + bsiz2; k <= i__1; ++k) { + z__[k] = 0.; +/* L20: */ + } + +/* Loop through remaining levels 1 -> CURLVL applying the Givens */ +/* rotations and permutation and then multiplying the center matrices */ +/* against the current Z. */ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = *curlvl - k; + i__3 = *curlvl - k - 1; + curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - + 1; + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + zptr1 = mid - psiz1; + +/* Apply Givens at CURR and CURR+1 */ + + i__2 = givptr[curr + 1] - 1; + for (i__ = givptr[curr]; i__ <= i__2; ++i__) { + drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & + z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( + i__ << 1) + 1], &givnum[(i__ << 1) + 2]); +/* L30: */ + } + i__2 = givptr[curr + 2] - 1; + for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { + drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ + mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << + 1) + 1], &givnum[(i__ << 1) + 2]); +/* L40: */ + } + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; + i__2 = psiz1 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; +/* L50: */ + } + i__2 = psiz2 - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - + 1]; +/* L60: */ + } + +/* Multiply Blocks at CURR and CURR+1 */ + +/* Determine size of these matrices. We add HALF to the value of */ +/* the SQRT in case the machine underestimates one of these */ +/* square roots. */ + + bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + + .5); + bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1]) + ) + .5); + if (bsiz1 > 0) { + dgemv_((char *)"T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, & + ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1, (ftnlen)1); + } + i__2 = psiz1 - bsiz1; + dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); + if (bsiz2 > 0) { + dgemv_((char *)"T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, & + ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1, ( + ftnlen)1); + } + i__2 = psiz2 - bsiz2; + dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & + c__1); + + i__2 = *tlvls - k; + ptr += pow_ii(&c__2, &i__2); +/* L70: */ + } + + return 0; + +/* End of DLAEDA */ + +} /* dlaeda_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaev2.cpp b/lib/linalg/dlaev2.cpp new file mode 100644 index 0000000000..32f4eb5424 --- /dev/null +++ b/lib/linalg/dlaev2.cpp @@ -0,0 +1,263 @@ +/* fortran/dlaev2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAEV2 computes the eigenvalues and eigenvectors of a 2-by-2 symmetric/Hermitian matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAEV2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION A, B, C, CS1, RT1, RT2, SN1 */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix */ +/* > [ A B ] */ +/* > [ B C ]. */ +/* > On return, RT1 is the eigenvalue of larger absolute value, RT2 is the */ +/* > eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right */ +/* > eigenvector for RT1, giving the decomposition */ +/* > */ +/* > [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] */ +/* > [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION */ +/* > The (1,1) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION */ +/* > The (1,2) element and the conjugate of the (2,1) element of */ +/* > the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION */ +/* > The (2,2) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RT1 */ +/* > \verbatim */ +/* > RT1 is DOUBLE PRECISION */ +/* > The eigenvalue of larger absolute value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RT2 */ +/* > \verbatim */ +/* > RT2 is DOUBLE PRECISION */ +/* > The eigenvalue of smaller absolute value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] CS1 */ +/* > \verbatim */ +/* > CS1 is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SN1 */ +/* > \verbatim */ +/* > SN1 is DOUBLE PRECISION */ +/* > The vector (CS1, SN1) is a unit right eigenvector for RT1. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > RT1 is accurate to a few ulps barring over/underflow. */ +/* > */ +/* > RT2 may be inaccurate if there is massive cancellation in the */ +/* > determinant A*C-B*B; higher precision or correctly rounded or */ +/* > correctly truncated arithmetic would be needed to compute RT2 */ +/* > accurately in all cases. */ +/* > */ +/* > CS1 and SN1 are accurate to a few ulps barring over/underflow. */ +/* > */ +/* > Overflow is possible only if RT1 is within a factor of 5 of overflow. */ +/* > Underflow is harmless if the input data is 0 or exceeds */ +/* > underflow_threshold / macheps. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__, + doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs; + integer sgn1, sgn2; + doublereal acmn, acmx; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Compute the eigenvalues */ + + sm = *a + *c__; + df = *a - *c__; + adf = abs(df); + tb = *b + *b; + ab = abs(tb); + if (abs(*a) > abs(*c__)) { + acmx = *a; + acmn = *c__; + } else { + acmx = *c__; + acmn = *a; + } + if (adf > ab) { +/* Computing 2nd power */ + d__1 = ab / adf; + rt = adf * sqrt(d__1 * d__1 + 1.); + } else if (adf < ab) { +/* Computing 2nd power */ + d__1 = adf / ab; + rt = ab * sqrt(d__1 * d__1 + 1.); + } else { + +/* Includes case AB=ADF=0 */ + + rt = ab * sqrt(2.); + } + if (sm < 0.) { + *rt1 = (sm - rt) * .5; + sgn1 = -1; + +/* Order of execution important. */ +/* To get fully accurate smaller eigenvalue, */ +/* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else if (sm > 0.) { + *rt1 = (sm + rt) * .5; + sgn1 = 1; + +/* Order of execution important. */ +/* To get fully accurate smaller eigenvalue, */ +/* next line needs to be executed in higher precision. */ + + *rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b; + } else { + +/* Includes case RT1 = RT2 = 0 */ + + *rt1 = rt * .5; + *rt2 = rt * -.5; + sgn1 = 1; + } + +/* Compute the eigenvector */ + + if (df >= 0.) { + cs = df + rt; + sgn2 = 1; + } else { + cs = df - rt; + sgn2 = -1; + } + acs = abs(cs); + if (acs > ab) { + ct = -tb / cs; + *sn1 = 1. / sqrt(ct * ct + 1.); + *cs1 = ct * *sn1; + } else { + if (ab == 0.) { + *cs1 = 1.; + *sn1 = 0.; + } else { + tn = -cs / tb; + *cs1 = 1. / sqrt(tn * tn + 1.); + *sn1 = tn * *cs1; + } + } + if (sgn1 == sgn2) { + tn = *cs1; + *cs1 = -(*sn1); + *sn1 = tn; + } + return 0; + +/* End of DLAEV2 */ + +} /* dlaev2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlals0.cpp b/lib/linalg/dlals0.cpp new file mode 100644 index 0000000000..ce36d7d553 --- /dev/null +++ b/lib/linalg/dlals0.cpp @@ -0,0 +1,602 @@ +/* fortran/dlals0.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b5 = -1.; +static integer c__1 = 1; +static doublereal c_b11 = 1.; +static doublereal c_b13 = 0.; +static integer c__0 = 0; + +/* > \brief \b DLALS0 applies back multiplying factors in solving the least squares problem using divide and c +onquer SVD approach. Used by sgelsd. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLALS0 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, */ +/* PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, */ +/* POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, */ +/* $ LDGNUM, NL, NR, NRHS, SQRE */ +/* DOUBLE PRECISION C, S */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( LDGCOL, * ), PERM( * ) */ +/* DOUBLE PRECISION B( LDB, * ), BX( LDBX, * ), DIFL( * ), */ +/* $ DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ), */ +/* $ POLES( LDGNUM, * ), WORK( * ), Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLALS0 applies back the multiplying factors of either the left or the */ +/* > right singular vector matrix of a diagonal matrix appended by a row */ +/* > to the right hand side matrix B in solving the least squares problem */ +/* > using the divide-and-conquer SVD approach. */ +/* > */ +/* > For the left singular vector matrix, three types of orthogonal */ +/* > matrices are involved: */ +/* > */ +/* > (1L) Givens rotations: the number of such rotations is GIVPTR; the */ +/* > pairs of columns/rows they were applied to are stored in GIVCOL; */ +/* > and the C- and S-values of these rotations are stored in GIVNUM. */ +/* > */ +/* > (2L) Permutation. The (NL+1)-st row of B is to be moved to the first */ +/* > row, and for J=2:N, PERM(J)-th row of B is to be moved to the */ +/* > J-th row. */ +/* > */ +/* > (3L) The left singular vector matrix of the remaining matrix. */ +/* > */ +/* > For the right singular vector matrix, four types of orthogonal */ +/* > matrices are involved: */ +/* > */ +/* > (1R) The right singular vector matrix of the remaining matrix. */ +/* > */ +/* > (2R) If SQRE = 1, one extra Givens rotation to generate the right */ +/* > null space. */ +/* > */ +/* > (3R) The inverse transformation of (2L). */ +/* > */ +/* > (4R) The inverse transformation of (1L). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > Specifies whether singular vectors are to be computed in */ +/* > factored form: */ +/* > = 0: Left singular vector matrix. */ +/* > = 1: Right singular vector matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NL */ +/* > \verbatim */ +/* > NL is INTEGER */ +/* > The row dimension of the upper block. NL >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NR */ +/* > \verbatim */ +/* > NR is INTEGER */ +/* > The row dimension of the lower block. NR >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SQRE */ +/* > \verbatim */ +/* > SQRE is INTEGER */ +/* > = 0: the lower block is an NR-by-NR square matrix. */ +/* > = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ +/* > */ +/* > The bidiagonal matrix has row dimension N = NL + NR + 1, */ +/* > and column dimension M = N + SQRE. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B and BX. NRHS must be at least 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension ( LDB, NRHS ) */ +/* > On input, B contains the right hand sides of the least */ +/* > squares problem in rows 1 through M. On output, B contains */ +/* > the solution X in rows 1 through N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B. LDB must be at least */ +/* > max(1,MAX( M, N ) ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BX */ +/* > \verbatim */ +/* > BX is DOUBLE PRECISION array, dimension ( LDBX, NRHS ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBX */ +/* > \verbatim */ +/* > LDBX is INTEGER */ +/* > The leading dimension of BX. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension ( N ) */ +/* > The permutations (from deflation and sorting) applied */ +/* > to the two blocks. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER */ +/* > The number of Givens rotations which took place in this */ +/* > subproblem. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) */ +/* > Each pair of numbers indicates a pair of rows/columns */ +/* > involved in a Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDGCOL */ +/* > \verbatim */ +/* > LDGCOL is INTEGER */ +/* > The leading dimension of GIVCOL, must be at least N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* > Each number indicates the C or S value used in the */ +/* > corresponding Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDGNUM */ +/* > \verbatim */ +/* > LDGNUM is INTEGER */ +/* > The leading dimension of arrays DIFR, POLES and */ +/* > GIVNUM, must be at least K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] POLES */ +/* > \verbatim */ +/* > POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* > On entry, POLES(1:K, 1) contains the new singular */ +/* > values obtained from solving the secular equation, and */ +/* > POLES(1:K, 2) is an array containing the poles in the secular */ +/* > equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIFL */ +/* > \verbatim */ +/* > DIFL is DOUBLE PRECISION array, dimension ( K ). */ +/* > On entry, DIFL(I) is the distance between I-th updated */ +/* > (undeflated) singular value and the I-th (undeflated) old */ +/* > singular value. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIFR */ +/* > \verbatim */ +/* > DIFR is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). */ +/* > On entry, DIFR(I, 1) contains the distances between I-th */ +/* > updated (undeflated) singular value and the I+1-th */ +/* > (undeflated) old singular value. And DIFR(I, 2) is the */ +/* > normalizing factor for the I-th right singular vector. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( K ) */ +/* > Contain the components of the deflation-adjusted updating row */ +/* > vector. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > Contains the dimension of the non-deflated matrix, */ +/* > This is the order of the related secular equation. 1 <= K <=N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION */ +/* > C contains garbage if SQRE =0 and the C-value of a Givens */ +/* > rotation related to the right null space if SQRE = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION */ +/* > S contains garbage if SQRE =0 and the S-value of a Givens */ +/* > rotation related to the right null space if SQRE = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension ( K ) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ + +/* ===================================================================== */ +/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal + *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, + integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal * + poles, doublereal *difl, doublereal *difr, doublereal *z__, integer * + k, doublereal *c__, doublereal *s, doublereal *work, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, + difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, + poles_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j, m, n; + doublereal dj; + integer nlp1; + doublereal temp; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen), dcopy_(integer *, + doublereal *, integer *, doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen), dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + doublereal dsigjp; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + difr_dim1 = *ldgnum; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --z__; + --work; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*nrhs < 1) { + *info = -5; + } else if (*ldb < n) { + *info = -7; + } else if (*ldbx < n) { + *info = -9; + } else if (*givptr < 0) { + *info = -11; + } else if (*ldgcol < n) { + *info = -13; + } else if (*ldgnum < n) { + *info = -15; + } else if (*k < 1) { + *info = -20; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLALS0", &i__1, (ftnlen)6); + return 0; + } + + m = n + *sqre; + nlp1 = *nl + 1; + + if (*icompq == 0) { + +/* Apply back orthogonal transformations from the left. */ + +/* Step (1L): apply back the Givens rotations performed. */ + + i__1 = *givptr; + for (i__ = 1; i__ <= i__1; ++i__) { + drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); +/* L10: */ + } + +/* Step (2L): permute rows of B. */ + + dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], + ldbx); +/* L20: */ + } + +/* Step (3L): apply the inverse of the left singular vector */ +/* matrix to BX. */ + + if (*k == 1) { + dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); + if (z__[1] < 0.) { + dscal_(nrhs, &c_b5, &b[b_offset], ldb); + } + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = poles[j + poles_dim1]; + dsigj = -poles[j + (poles_dim1 << 1)]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; + } + if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) { + work[j] = 0.; + } else { + work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / + (poles[j + (poles_dim1 << 1)] + dj); + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + work[i__] = 0.; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigj) - diflj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L30: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] == + 0.) { + work[i__] = 0.; + } else { + work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] + / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], & + dsigjp) + difrj) / (poles[i__ + (poles_dim1 << + 1)] + dj); + } +/* L40: */ + } + work[1] = -1.; + temp = dnrm2_(k, &work[1], &c__1); + dgemv_((char *)"T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], & + c__1, &c_b13, &b[j + b_dim1], ldb, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b[j + + b_dim1], ldb, info, (ftnlen)1); +/* L50: */ + } + } + +/* Move the deflated rows of BX to B also. */ + + if (*k < max(m,n)) { + i__1 = n - *k; + dlacpy_((char *)"A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + + b_dim1], ldb, (ftnlen)1); + } + } else { + +/* Apply back the right orthogonal transformations. */ + +/* Step (1R): apply back the new right singular vector matrix */ +/* to B. */ + + if (*k == 1) { + dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); + } else { + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dsigj = poles[j + (poles_dim1 << 1)]; + if (z__[j] == 0.) { + work[j] = 0.; + } else { + work[j] = -z__[j] / difl[j] / (dsigj + poles[j + + poles_dim1]) / difr[j + (difr_dim1 << 1)]; + } + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + work[i__] = 0.; + } else { + d__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; + work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[ + i__ + difr_dim1]) / (dsigj + poles[i__ + + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; + } +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + if (z__[j] == 0.) { + work[i__] = 0.; + } else { + d__1 = -poles[i__ + (poles_dim1 << 1)]; + work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[ + i__]) / (dsigj + poles[i__ + poles_dim1]) / + difr[i__ + (difr_dim1 << 1)]; + } +/* L70: */ + } + dgemv_((char *)"T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], & + c__1, &c_b13, &bx[j + bx_dim1], ldbx, (ftnlen)1); +/* L80: */ + } + } + +/* Step (2R): if SQRE = 1, apply back the rotation that is */ +/* related to the right null space of the subproblem. */ + + if (*sqre == 1) { + dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); + drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, + s); + } + if (*k < max(m,n)) { + i__1 = n - *k; + dlacpy_((char *)"A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + + bx_dim1], ldbx, (ftnlen)1); + } + +/* Step (3R): permute rows of B. */ + + dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); + if (*sqre == 1) { + dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], + ldb); +/* L90: */ + } + +/* Step (4R): apply back the Givens rotations performed. */ + + for (i__ = *givptr; i__ >= 1; --i__) { + d__1 = -givnum[i__ + givnum_dim1]; + drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & + b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + + (givnum_dim1 << 1)], &d__1); +/* L100: */ + } + } + + return 0; + +/* End of DLALS0 */ + +} /* dlals0_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlalsa.cpp b/lib/linalg/dlalsa.cpp new file mode 100644 index 0000000000..7c09a46de3 --- /dev/null +++ b/lib/linalg/dlalsa.cpp @@ -0,0 +1,598 @@ +/* fortran/dlalsa.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b7 = 1.; +static doublereal c_b8 = 0.; +static integer c__2 = 2; + +/* > \brief \b DLALSA computes the SVD of the coefficient matrix in compact form. Used by sgelsd. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLALSA + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, */ +/* LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, */ +/* GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, */ +/* IWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, */ +/* $ SMLSIZ */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), */ +/* $ K( * ), PERM( LDGCOL, * ) */ +/* DOUBLE PRECISION B( LDB, * ), BX( LDBX, * ), C( * ), */ +/* $ DIFL( LDU, * ), DIFR( LDU, * ), */ +/* $ GIVNUM( LDU, * ), POLES( LDU, * ), S( * ), */ +/* $ U( LDU, * ), VT( LDU, * ), WORK( * ), */ +/* $ Z( LDU, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLALSA is an itermediate step in solving the least squares problem */ +/* > by computing the SVD of the coefficient matrix in compact form (The */ +/* > singular vectors are computed as products of simple orthorgonal */ +/* > matrices.). */ +/* > */ +/* > If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector */ +/* > matrix of an upper bidiagonal matrix to the right hand side; and if */ +/* > ICOMPQ = 1, DLALSA applies the right singular vector matrix to the */ +/* > right hand side. The singular vector matrices were generated in */ +/* > compact form by DLALSA. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > Specifies whether the left or the right singular vector */ +/* > matrix is involved. */ +/* > = 0: Left singular vector matrix */ +/* > = 1: Right singular vector matrix */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SMLSIZ */ +/* > \verbatim */ +/* > SMLSIZ is INTEGER */ +/* > The maximum size of the subproblems at the bottom of the */ +/* > computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The row and column dimensions of the upper bidiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B and BX. NRHS must be at least 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension ( LDB, NRHS ) */ +/* > On input, B contains the right hand sides of the least */ +/* > squares problem in rows 1 through M. */ +/* > On output, B contains the solution X in rows 1 through N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B in the calling subprogram. */ +/* > LDB must be at least max(1,MAX( M, N ) ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] BX */ +/* > \verbatim */ +/* > BX is DOUBLE PRECISION array, dimension ( LDBX, NRHS ) */ +/* > On exit, the result of applying the left or right singular */ +/* > vector matrix to B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDBX */ +/* > \verbatim */ +/* > LDBX is INTEGER */ +/* > The leading dimension of BX. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). */ +/* > On entry, U contains the left singular vector matrices of all */ +/* > subproblems at the bottom level. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER, LDU = > N. */ +/* > The leading dimension of arrays U, VT, DIFL, DIFR, */ +/* > POLES, GIVNUM, and Z. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). */ +/* > On entry, VT**T contains the right singular vector matrices of */ +/* > all subproblems at the bottom level. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER array, dimension ( N ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIFL */ +/* > \verbatim */ +/* > DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ). */ +/* > where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIFR */ +/* > \verbatim */ +/* > DIFR is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ +/* > On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record */ +/* > distances between singular values on the I-th level and */ +/* > singular values on the (I -1)-th level, and DIFR(*, 2 * I) */ +/* > record the normalizing factors of the right singular vectors */ +/* > matrices of subproblems on I-th level. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( LDU, NLVL ). */ +/* > On entry, Z(1, I) contains the components of the deflation- */ +/* > adjusted updating row vector for subproblems on the I-th */ +/* > level. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] POLES */ +/* > \verbatim */ +/* > POLES is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ +/* > On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old */ +/* > singular values involved in the secular equations on the I-th */ +/* > level. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER array, dimension ( N ). */ +/* > On entry, GIVPTR( I ) records the number of Givens */ +/* > rotations performed on the I-th problem on the computation */ +/* > tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension ( LDGCOL, 2 * NLVL ). */ +/* > On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the */ +/* > locations of Givens rotations performed on the I-th level on */ +/* > the computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDGCOL */ +/* > \verbatim */ +/* > LDGCOL is INTEGER, LDGCOL = > N. */ +/* > The leading dimension of arrays GIVCOL and PERM. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension ( LDGCOL, NLVL ). */ +/* > On entry, PERM(*, I) records permutations done on the I-th */ +/* > level of the computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). */ +/* > On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- */ +/* > values of Givens rotations performed on the I-th level on the */ +/* > computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension ( N ). */ +/* > On entry, if the I-th subproblem is not square, */ +/* > C( I ) contains the C-value of a Givens rotation related to */ +/* > the right null space of the I-th subproblem. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension ( N ). */ +/* > On entry, if the I-th subproblem is not square, */ +/* > S( I ) contains the S-value of a Givens rotation related to */ +/* > the right null space of the I-th subproblem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (3*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ + +/* ===================================================================== */ +/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n, + integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * + ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, + doublereal *difl, doublereal *difr, doublereal *z__, doublereal * + poles, integer *givptr, integer *givcol, integer *ldgcol, integer * + perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * + work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, + b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, + difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, + u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, + i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl, ndb1, + nlp1, lvl2, nrp1, nlvl, sqre; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + integer inode, ndiml, ndimr; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlals0_(integer *, integer *, integer *, + integer *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + integer *), dlasdt_(integer *, integer *, integer *, integer *, + integer *, integer *, integer *), xerbla_(char *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + bx_dim1 = *ldbx; + bx_offset = 1 + bx_dim1; + bx -= bx_offset; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < *smlsiz) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < *n) { + *info = -6; + } else if (*ldbx < *n) { + *info = -8; + } else if (*ldu < *n) { + *info = -10; + } else if (*ldgcol < *n) { + *info = -19; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLALSA", &i__1, (ftnlen)6); + return 0; + } + +/* Book-keeping and setting up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* The following code applies back the left singular vector factors. */ +/* For applying back the right singular vector factors, go to 50. */ + + if (*icompq == 1) { + goto L50; + } + +/* The nodes on the bottom level of the tree were solved */ +/* by DLASDQ. The corresponding left and right singular vector */ +/* matrices are in explicit form. First apply back the left */ +/* singular vector matrices. */ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* IC : center row of each node */ +/* NL : number of rows of left subproblem */ +/* NR : number of rows of right subproblem */ +/* NLF: starting row of the left subproblem */ +/* NRF: starting row of the right subproblem */ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + dgemm_((char *)"T", (char *)"N", &nl, nrhs, &nl, &c_b7, &u[nlf + u_dim1], ldu, &b[nlf + + b_dim1], ldb, &c_b8, &bx[nlf + bx_dim1], ldbx, (ftnlen)1, ( + ftnlen)1); + dgemm_((char *)"T", (char *)"N", &nr, nrhs, &nr, &c_b7, &u[nrf + u_dim1], ldu, &b[nrf + + b_dim1], ldb, &c_b8, &bx[nrf + bx_dim1], ldbx, (ftnlen)1, ( + ftnlen)1); +/* L10: */ + } + +/* Next copy the rows of B that correspond to unchanged rows */ +/* in the bidiagonal matrix to BX. */ + + i__1 = nd; + for (i__ = 1; i__ <= i__1; ++i__) { + ic = iwork[inode + i__ - 1]; + dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx); +/* L20: */ + } + +/* Finally go through the left singular vector matrices of all */ +/* the other subproblems bottom-up on the tree. */ + + j = pow_ii(&c__2, &nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* find the first node LF and last node LL on */ +/* the current level LVL */ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + --j; + dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, & + b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L30: */ + } +/* L40: */ + } + goto L90; + +/* ICOMPQ = 1: applying back the right singular vector factors. */ + +L50: + +/* First now go through the right singular vector matrices of all */ +/* the tree nodes top-down. */ + + j = 0; + i__1 = nlvl; + for (lvl = 1; lvl <= i__1; ++lvl) { + lvl2 = (lvl << 1) - 1; + +/* Find the first node LF and last node LL on */ +/* the current level LVL. */ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__2 = lvl - 1; + lf = pow_ii(&c__2, &i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; + for (i__ = ll; i__ >= i__2; --i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqre = 0; + } else { + sqre = 1; + } + ++j; + dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[ + nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], & + givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, & + givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 * + poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf + + lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[ + j], &s[j], &work[1], info); +/* L60: */ + } +/* L70: */ + } + +/* The nodes on the bottom level of the tree were solved */ +/* by DLASDQ. The corresponding right singular vector */ +/* matrices are in explicit form. Apply them back. */ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nr = iwork[ndimr + i1]; + nlp1 = nl + 1; + if (i__ == nd) { + nrp1 = nr; + } else { + nrp1 = nr + 1; + } + nlf = ic - nl; + nrf = ic + 1; + dgemm_((char *)"T", (char *)"N", &nlp1, nrhs, &nlp1, &c_b7, &vt[nlf + vt_dim1], ldu, & + b[nlf + b_dim1], ldb, &c_b8, &bx[nlf + bx_dim1], ldbx, ( + ftnlen)1, (ftnlen)1); + dgemm_((char *)"T", (char *)"N", &nrp1, nrhs, &nrp1, &c_b7, &vt[nrf + vt_dim1], ldu, & + b[nrf + b_dim1], ldb, &c_b8, &bx[nrf + bx_dim1], ldbx, ( + ftnlen)1, (ftnlen)1); +/* L80: */ + } + +L90: + + return 0; + +/* End of DLALSA */ + +} /* dlalsa_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlalsd.cpp b/lib/linalg/dlalsd.cpp new file mode 100644 index 0000000000..07c3ca57f0 --- /dev/null +++ b/lib/linalg/dlalsd.cpp @@ -0,0 +1,632 @@ +/* fortran/dlalsd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b6 = 0.; +static integer c__0 = 0; +static doublereal c_b11 = 1.; + +/* > \brief \b DLALSD uses the singular value decomposition of A to solve the least squares problem. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLALSD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, */ +/* RANK, WORK, IWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ */ +/* DOUBLE PRECISION RCOND */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION B( LDB, * ), D( * ), E( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLALSD uses the singular value decomposition of A to solve the least */ +/* > squares problem of finding X to minimize the Euclidean norm of each */ +/* > column of A*X-B, where A is N-by-N upper bidiagonal, and X and B */ +/* > are N-by-NRHS. The solution X overwrites B. */ +/* > */ +/* > The singular values of A smaller than RCOND times the largest */ +/* > singular value are treated as zero in solving the least squares */ +/* > problem; in this case a minimum norm solution is returned. */ +/* > The actual singular values are returned in D in ascending order. */ +/* > */ +/* > This code makes very mild assumptions about floating point */ +/* > arithmetic. It will work on machines with a guard digit in */ +/* > add/subtract, or on those binary machines without guard digits */ +/* > which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */ +/* > It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': D and E define an upper bidiagonal matrix. */ +/* > = 'L': D and E define a lower bidiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SMLSIZ */ +/* > \verbatim */ +/* > SMLSIZ is INTEGER */ +/* > The maximum size of the subproblems at the bottom of the */ +/* > computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the bidiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of columns of B. NRHS must be at least 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry D contains the main diagonal of the bidiagonal */ +/* > matrix. On exit, if INFO = 0, D contains its singular values. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > Contains the super-diagonal entries of the bidiagonal matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On input, B contains the right hand sides of the least */ +/* > squares problem. On output, B contains the solution X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of B in the calling subprogram. */ +/* > LDB must be at least max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RCOND */ +/* > \verbatim */ +/* > RCOND is DOUBLE PRECISION */ +/* > The singular values of A less than or equal to RCOND times */ +/* > the largest singular value are treated as zero in solving */ +/* > the least squares problem. If RCOND is negative, */ +/* > machine precision is used instead. */ +/* > For example, if diag(S)*X=B were the least squares problem, */ +/* > where diag(S) is a diagonal matrix of singular values, the */ +/* > solution would be X(i) = B(i) / S(i) if S(i) is greater than */ +/* > RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to */ +/* > RCOND*max(S). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RANK */ +/* > \verbatim */ +/* > RANK is INTEGER */ +/* > The number of singular values of A greater than RCOND times */ +/* > the largest singular value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension at least */ +/* > (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), */ +/* > where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension at least */ +/* > (3*N*NLVL + 11*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: The algorithm failed to compute a singular value while */ +/* > working on the submatrix lying in rows and columns */ +/* > INFO/(N+1) through MOD(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Ren-Cang Li, Computer Science Division, University of */ +/* > California at Berkeley, USA \n */ +/* > Osni Marques, LBNL/NERSC, USA \n */ + +/* ===================================================================== */ +/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer + *nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb, + doublereal *rcond, integer *rank, doublereal *work, integer *iwork, + integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer b_dim1, b_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double log(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer c__, i__, j, k; + doublereal r__; + integer s, u, z__; + doublereal cs; + integer bx; + doublereal sn; + integer st, vt, nm1, st1; + doublereal eps; + integer iwk; + doublereal tol; + integer difl, difr; + doublereal rcnd; + integer perm, nsub; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer nlvl, sqre, bxst; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer + *); + integer poles, sizei, nsize, nwork, icmpq1, icmpq2; + extern doublereal dlamch_(char *, ftnlen); + extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), dlalsa_(integer *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, ftnlen), dlacpy_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), dlartg_(doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), dlaset_(char *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, + ftnlen); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *, ftnlen); + doublereal orgnrm; + integer givnum, givptr, smlszp; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -3; + } else if (*nrhs < 1) { + *info = -4; + } else if (*ldb < 1 || *ldb < *n) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLALSD", &i__1, (ftnlen)6); + return 0; + } + + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + +/* Set up the tolerance. */ + + if (*rcond <= 0. || *rcond >= 1.) { + rcnd = eps; + } else { + rcnd = *rcond; + } + + *rank = 0; + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } else if (*n == 1) { + if (d__[1] == 0.) { + dlaset_((char *)"A", &c__1, nrhs, &c_b6, &c_b6, &b[b_offset], ldb, ( + ftnlen)1); + } else { + *rank = 1; + dlascl_((char *)"G", &c__0, &c__0, &d__[1], &c_b11, &c__1, nrhs, &b[ + b_offset], ldb, info, (ftnlen)1); + d__[1] = abs(d__[1]); + } + return 0; + } + +/* Rotate the matrix if it is lower bidiagonal. */ + + if (*(unsigned char *)uplo == 'L') { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (*nrhs == 1) { + drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], & + c__1, &cs, &sn); + } else { + work[(i__ << 1) - 1] = cs; + work[i__ * 2] = sn; + } +/* L10: */ + } + if (*nrhs > 1) { + i__1 = *nrhs; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *n - 1; + for (j = 1; j <= i__2; ++j) { + cs = work[(j << 1) - 1]; + sn = work[j * 2]; + drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ * + b_dim1], &c__1, &cs, &sn); +/* L20: */ + } +/* L30: */ + } + } + } + +/* Scale. */ + + nm1 = *n - 1; + orgnrm = dlanst_((char *)"M", n, &d__[1], &e[1], (ftnlen)1); + if (orgnrm == 0.) { + dlaset_((char *)"A", n, nrhs, &c_b6, &c_b6, &b[b_offset], ldb, (ftnlen)1); + return 0; + } + + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b11, n, &c__1, &d__[1], n, info, ( + ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b11, &nm1, &c__1, &e[1], &nm1, + info, (ftnlen)1); + +/* If N is smaller than the minimum divide size SMLSIZ, then solve */ +/* the problem with another solver. */ + + if (*n <= *smlsiz) { + nwork = *n * *n + 1; + dlaset_((char *)"A", n, n, &c_b6, &c_b11, &work[1], n, (ftnlen)1); + dlasdq_((char *)"U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, & + work[1], n, &b[b_offset], ldb, &work[nwork], info, (ftnlen)1); + if (*info != 0) { + return 0; + } + tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (d__[i__] <= tol) { + dlaset_((char *)"A", &c__1, nrhs, &c_b6, &c_b6, &b[i__ + b_dim1], ldb, + (ftnlen)1); + } else { + dlascl_((char *)"G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &b[ + i__ + b_dim1], ldb, info, (ftnlen)1); + ++(*rank); + } +/* L40: */ + } + dgemm_((char *)"T", (char *)"N", n, nrhs, n, &c_b11, &work[1], n, &b[b_offset], ldb, & + c_b6, &work[nwork], n, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", n, nrhs, &work[nwork], n, &b[b_offset], ldb, (ftnlen)1); + +/* Unscale. */ + + dlascl_((char *)"G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, + info, (ftnlen)1); + dlasrt_((char *)"D", n, &d__[1], info, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], + ldb, info, (ftnlen)1); + + return 0; + } + +/* Book-keeping and setting up some constants. */ + + nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) / + log(2.)) + 1; + + smlszp = *smlsiz + 1; + + u = 1; + vt = *smlsiz * *n + 1; + difl = vt + smlszp * *n; + difr = difl + nlvl * *n; + z__ = difr + (nlvl * *n << 1); + c__ = z__ + nlvl * *n; + s = c__ + *n; + poles = s + *n; + givnum = poles + (nlvl << 1) * *n; + bx = givnum + (nlvl << 1) * *n; + nwork = bx + *n * *nrhs; + + sizei = *n + 1; + k = sizei + *n; + givptr = k + *n; + perm = givptr + *n; + givcol = perm + nlvl * *n; + iwk = givcol + (nlvl * *n << 1); + + st = 1; + sqre = 0; + icmpq1 = 1; + icmpq2 = 0; + nsub = 0; + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_sign(&eps, &d__[i__]); + } +/* L50: */ + } + + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + ++nsub; + iwork[nsub] = st; + +/* Subproblem found. First determine its size and then */ +/* apply divide and conquer on it. */ + + if (i__ < nm1) { + +/* A subproblem with E(I) small for I < NM1. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + +/* A subproblem with E(NM1) not too small but I = NM1. */ + + nsize = *n - st + 1; + iwork[sizei + nsub - 1] = nsize; + } else { + +/* A subproblem with E(NM1) small. This implies an */ +/* 1-by-1 subproblem at D(N), which is not solved */ +/* explicitly. */ + + nsize = i__ - st + 1; + iwork[sizei + nsub - 1] = nsize; + ++nsub; + iwork[nsub] = *n; + iwork[sizei + nsub - 1] = 1; + dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n); + } + st1 = st - 1; + if (nsize == 1) { + +/* This is a 1-by-1 subproblem and is not solved */ +/* explicitly. */ + + dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n); + } else if (nsize <= *smlsiz) { + +/* This is a small subproblem and is solved by DLASDQ. */ + + dlaset_((char *)"A", &nsize, &nsize, &c_b6, &c_b11, &work[vt + st1], + n, (ftnlen)1); + dlasdq_((char *)"U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[ + st], &work[vt + st1], n, &work[nwork], n, &b[st + + b_dim1], ldb, &work[nwork], info, (ftnlen)1); + if (*info != 0) { + return 0; + } + dlacpy_((char *)"A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx + + st1], n, (ftnlen)1); + } else { + +/* A large problem. Solve it using divide and conquer. */ + + dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & + work[u + st1], n, &work[vt + st1], &iwork[k + st1], & + work[difl + st1], &work[difr + st1], &work[z__ + st1], + &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + + st1], &work[c__ + st1], &work[s + st1], &work[nwork], + &iwork[iwk], info); + if (*info != 0) { + return 0; + } + bxst = bx + st1; + dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, & + work[bxst], n, &work[u + st1], n, &work[vt + st1], & + iwork[k + st1], &work[difl + st1], &work[difr + st1], + &work[z__ + st1], &work[poles + st1], &iwork[givptr + + st1], &iwork[givcol + st1], n, &iwork[perm + st1], & + work[givnum + st1], &work[c__ + st1], &work[s + st1], + &work[nwork], &iwork[iwk], info); + if (*info != 0) { + return 0; + } + } + st = i__ + 1; + } +/* L60: */ + } + +/* Apply the singular values and treat the tiny ones as zero. */ + + tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1)); + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Some of the elements in D can be negative because 1-by-1 */ +/* subproblems were not solved explicitly. */ + + if ((d__1 = d__[i__], abs(d__1)) <= tol) { + dlaset_((char *)"A", &c__1, nrhs, &c_b6, &c_b6, &work[bx + i__ - 1], n, ( + ftnlen)1); + } else { + ++(*rank); + dlascl_((char *)"G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &work[ + bx + i__ - 1], n, info, (ftnlen)1); + } + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* L70: */ + } + +/* Now apply back the right singular vectors. */ + + icmpq2 = 1; + i__1 = nsub; + for (i__ = 1; i__ <= i__1; ++i__) { + st = iwork[i__]; + st1 = st - 1; + nsize = iwork[sizei + i__ - 1]; + bxst = bx + st1; + if (nsize == 1) { + dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb); + } else if (nsize <= *smlsiz) { + dgemm_((char *)"T", (char *)"N", &nsize, nrhs, &nsize, &c_b11, &work[vt + st1], n, + &work[bxst], n, &c_b6, &b[st + b_dim1], ldb, (ftnlen)1, ( + ftnlen)1); + } else { + dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st + + b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[ + k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ + givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], + &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[ + iwk], info); + if (*info != 0) { + return 0; + } + } +/* L80: */ + } + +/* Unscale and sort the singular values. */ + + dlascl_((char *)"G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, info, ( + ftnlen)1); + dlasrt_((char *)"D", n, &d__[1], info, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], ldb, + info, (ftnlen)1); + + return 0; + +/* End of DLALSD */ + +} /* dlalsd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlamch.cpp b/lib/linalg/dlamch.cpp new file mode 100644 index 0000000000..3d616d95d2 --- /dev/null +++ b/lib/linalg/dlamch.cpp @@ -0,0 +1,45 @@ + +#include +#include + +extern "C" { + +#include "lmp_f2c.h" + +// undefine conflicting f2c macros +#undef min +#undef max + +doublereal dlamch_(const char *cmach) +{ + if (!cmach) return 0.0; + char select = toupper(*cmach); + + // BLAS assumes rounding not truncation => epsilon is half + const double eps = 0.5 * std::numeric_limits::epsilon(); + if (select == 'E') return eps; + + double min = std::numeric_limits::min(); + const double max = std::numeric_limits::max(); + double small = 1.0 / max; + if (small >= min) min = small * (1.0 + eps); + if (select == 'S') return min; + + const double radix = std::numeric_limits::radix; + if (select == 'B') return radix; + + if (select == 'P') return eps * radix; + + if (select == 'N') return std::numeric_limits::digits; + + if (select == 'M') return std::numeric_limits::min_exponent; + + if (select == 'U') return min; + + if (select == 'L') return std::numeric_limits::max_exponent; + + if (select == 'O') return max; + + return 0.0; +} +} diff --git a/lib/linalg/dlamrg.cpp b/lib/linalg/dlamrg.cpp new file mode 100644 index 0000000000..581a64fba7 --- /dev/null +++ b/lib/linalg/dlamrg.cpp @@ -0,0 +1,206 @@ +/* fortran/dlamrg.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAMRG creates a permutation list to merge the entries of two independently sorted sets into a +single set sorted in ascending order. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAMRG + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER DTRD1, DTRD2, N1, N2 */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER INDEX( * ) */ +/* DOUBLE PRECISION A( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAMRG will create a permutation list which will merge the elements */ +/* > of A (which is composed of two independently sorted sets) into a */ +/* > single set which is sorted in ascending order. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N1 */ +/* > \verbatim */ +/* > N1 is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N2 */ +/* > \verbatim */ +/* > N2 is INTEGER */ +/* > These arguments contain the respective lengths of the two */ +/* > sorted lists to be merged. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (N1+N2) */ +/* > The first N1 elements of A contain a list of numbers which */ +/* > are sorted in either ascending or descending order. Likewise */ +/* > for the final N2 elements. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DTRD1 */ +/* > \verbatim */ +/* > DTRD1 is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DTRD2 */ +/* > \verbatim */ +/* > DTRD2 is INTEGER */ +/* > These are the strides to be taken through the array A. */ +/* > Allowable strides are 1 and -1. They indicate whether a */ +/* > subset of A is sorted in ascending (DTRDx = 1) or descending */ +/* > (DTRDx = -1) order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDEX */ +/* > \verbatim */ +/* > INDEX is INTEGER array, dimension (N1+N2) */ +/* > On exit this array will contain a permutation such that */ +/* > if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be */ +/* > sorted in ascending order. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer + *dtrd1, integer *dtrd2, integer *index) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, ind1, ind2, n1sv, n2sv; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --index; + --a; + + /* Function Body */ + n1sv = *n1; + n2sv = *n2; + if (*dtrd1 > 0) { + ind1 = 1; + } else { + ind1 = *n1; + } + if (*dtrd2 > 0) { + ind2 = *n1 + 1; + } else { + ind2 = *n1 + *n2; + } + i__ = 1; +/* while ( (N1SV > 0) & (N2SV > 0) ) */ +L10: + if (n1sv > 0 && n2sv > 0) { + if (a[ind1] <= a[ind2]) { + index[i__] = ind1; + ++i__; + ind1 += *dtrd1; + --n1sv; + } else { + index[i__] = ind2; + ++i__; + ind2 += *dtrd2; + --n2sv; + } + goto L10; + } +/* end while */ + if (n1sv == 0) { + i__1 = n2sv; + for (n1sv = 1; n1sv <= i__1; ++n1sv) { + index[i__] = ind2; + ++i__; + ind2 += *dtrd2; +/* L20: */ + } + } else { +/* N2SV .EQ. 0 */ + i__1 = n1sv; + for (n2sv = 1; n2sv <= i__1; ++n2sv) { + index[i__] = ind1; + ++i__; + ind1 += *dtrd1; +/* L30: */ + } + } + + return 0; + +/* End of DLAMRG */ + +} /* dlamrg_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlange.cpp b/lib/linalg/dlange.cpp new file mode 100644 index 0000000000..1ccc6fae2b --- /dev/null +++ b/lib/linalg/dlange.cpp @@ -0,0 +1,277 @@ +/* fortran/dlange.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute +value of any element of a general rectangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLANGE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER NORM */ +/* INTEGER LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLANGE returns the value of the one norm, or the Frobenius norm, or */ +/* > the infinity norm, or the element of largest absolute value of a */ +/* > real matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \return DLANGE */ +/* > \verbatim */ +/* > */ +/* > DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* > ( */ +/* > ( norm1(A), NORM = '1', 'O' or 'o' */ +/* > ( */ +/* > ( normI(A), NORM = 'I' or 'i' */ +/* > ( */ +/* > ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ +/* > */ +/* > where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* > normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* > normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* > squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies the value to be returned in DLANGE as described */ +/* > above. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. When M = 0, */ +/* > DLANGE is set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. When N = 0, */ +/* > DLANGE is set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The m by n matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(M,1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* > where LWORK >= M when NORM = 'I'; otherwise, WORK is not */ +/* > referenced. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGEauxiliary */ + +/* ===================================================================== */ +doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer + *lda, doublereal *work, ftnlen norm_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal sum, temp, scale; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal value; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (min(*m,*n) == 0) { + value = 0.; + } else if (lsame_(norm, (char *)"M", (ftnlen)1, (ftnlen)1)) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + if (value < temp || disnan_(&temp)) { + value = temp; + } +/* L10: */ + } +/* L20: */ + } + } else if (lsame_(norm, (char *)"O", (ftnlen)1, (ftnlen)1) || *(unsigned char *) + norm == '1') { + +/* Find norm1(A). */ + + value = 0.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + sum += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L30: */ + } + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L40: */ + } + } else if (lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1)) { + +/* Find normI(A). */ + + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L50: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1)); +/* L60: */ + } +/* L70: */ + } + value = 0.; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = work[i__]; + if (value < temp || disnan_(&temp)) { + value = temp; + } +/* L80: */ + } + } else if (lsame_(norm, (char *)"F", (ftnlen)1, (ftnlen)1) || lsame_(norm, (char *)"E", ( + ftnlen)1, (ftnlen)1)) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L90: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANGE */ + +} /* dlange_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlanst.cpp b/lib/linalg/dlanst.cpp new file mode 100644 index 0000000000..8a71bcd12d --- /dev/null +++ b/lib/linalg/dlanst.cpp @@ -0,0 +1,239 @@ +/* fortran/dlanst.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DLANST returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the ele +ment of largest absolute value of a real symmetric tridiagonal matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLANST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER NORM */ +/* INTEGER N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLANST returns the value of the one norm, or the Frobenius norm, or */ +/* > the infinity norm, or the element of largest absolute value of a */ +/* > real symmetric tridiagonal matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \return DLANST */ +/* > \verbatim */ +/* > */ +/* > DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* > ( */ +/* > ( norm1(A), NORM = '1', 'O' or 'o' */ +/* > ( */ +/* > ( normI(A), NORM = 'I' or 'i' */ +/* > ( */ +/* > ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ +/* > */ +/* > where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* > normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* > normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* > squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies the value to be returned in DLANST as described */ +/* > above. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. When N = 0, DLANST is */ +/* > set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The (n-1) sub-diagonal or super-diagonal elements of A. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e, + ftnlen norm_len) +{ + /* System generated locals */ + integer i__1; + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal sum, scale; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal anorm; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + if (*n <= 0) { + anorm = 0.; + } else if (lsame_(norm, (char *)"M", (ftnlen)1, (ftnlen)1)) { + +/* Find max(abs(A(i,j))). */ + + anorm = (d__1 = d__[*n], abs(d__1)); + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + sum = (d__1 = d__[i__], abs(d__1)); + if (anorm < sum || disnan_(&sum)) { + anorm = sum; + } + sum = (d__1 = e[i__], abs(d__1)); + if (anorm < sum || disnan_(&sum)) { + anorm = sum; + } +/* L10: */ + } + } else if (lsame_(norm, (char *)"O", (ftnlen)1, (ftnlen)1) || *(unsigned char *) + norm == '1' || lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1)) { + +/* Find norm1(A). */ + + if (*n == 1) { + anorm = abs(d__[1]); + } else { + anorm = abs(d__[1]) + abs(e[1]); + sum = (d__1 = e[*n - 1], abs(d__1)) + (d__2 = d__[*n], abs(d__2)); + if (anorm < sum || disnan_(&sum)) { + anorm = sum; + } + i__1 = *n - 1; + for (i__ = 2; i__ <= i__1; ++i__) { + sum = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[i__], abs(d__2) + ) + (d__3 = e[i__ - 1], abs(d__3)); + if (anorm < sum || disnan_(&sum)) { + anorm = sum; + } +/* L20: */ + } + } + } else if (lsame_(norm, (char *)"F", (ftnlen)1, (ftnlen)1) || lsame_(norm, (char *)"E", ( + ftnlen)1, (ftnlen)1)) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (*n > 1) { + i__1 = *n - 1; + dlassq_(&i__1, &e[1], &c__1, &scale, &sum); + sum *= 2; + } + dlassq_(n, &d__[1], &c__1, &scale, &sum); + anorm = scale * sqrt(sum); + } + + ret_val = anorm; + return ret_val; + +/* End of DLANST */ + +} /* dlanst_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlansy.cpp b/lib/linalg/dlansy.cpp new file mode 100644 index 0000000000..1c5a1fa24c --- /dev/null +++ b/lib/linalg/dlansy.cpp @@ -0,0 +1,317 @@ +/* fortran/dlansy.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the ele +ment of largest absolute value of a real symmetric matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLANSY + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER NORM, UPLO */ +/* INTEGER LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLANSY returns the value of the one norm, or the Frobenius norm, or */ +/* > the infinity norm, or the element of largest absolute value of a */ +/* > real symmetric matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \return DLANSY */ +/* > \verbatim */ +/* > */ +/* > DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* > ( */ +/* > ( norm1(A), NORM = '1', 'O' or 'o' */ +/* > ( */ +/* > ( normI(A), NORM = 'I' or 'i' */ +/* > ( */ +/* > ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ +/* > */ +/* > where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* > normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* > normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* > squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies the value to be returned in DLANSY as described */ +/* > above. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > symmetric matrix A is to be referenced. */ +/* > = 'U': Upper triangular part of A is referenced */ +/* > = 'L': Lower triangular part of A is referenced */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. When N = 0, DLANSY is */ +/* > set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The symmetric matrix A. If UPLO = 'U', the leading n by n */ +/* > upper triangular part of A contains the upper triangular part */ +/* > of the matrix A, and the strictly lower triangular part of A */ +/* > is not referenced. If UPLO = 'L', the leading n by n lower */ +/* > triangular part of A contains the lower triangular part of */ +/* > the matrix A, and the strictly upper triangular part of A is */ +/* > not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(N,1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* > where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ +/* > WORK is not referenced. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYauxiliary */ + +/* ===================================================================== */ +doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer + *lda, doublereal *work, ftnlen norm_len, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal sum, absa, scale; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal value; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (lsame_(norm, (char *)"M", (ftnlen)1, (ftnlen)1)) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + sum = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + sum = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1) || lsame_(norm, (char *)"O", ( + ftnlen)1, (ftnlen)1) || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is symmetric). */ + + value = 0.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L50: */ + } + work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + sum = work[i__]; + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = (d__1 = a[i__ + j * a_dim1], abs(d__1)); + sum += absa; + work[i__] += absa; +/* L90: */ + } + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L100: */ + } + } + } else if (lsame_(norm, (char *)"F", (ftnlen)1, (ftnlen)1) || lsame_(norm, (char *)"E", ( + ftnlen)1, (ftnlen)1)) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *lda + 1; + dlassq_(n, &a[a_offset], &i__1, &scale, &sum); + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of DLANSY */ + +} /* dlansy_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlapy2.cpp b/lib/linalg/dlapy2.cpp new file mode 100644 index 0000000000..6a5443d8df --- /dev/null +++ b/lib/linalg/dlapy2.cpp @@ -0,0 +1,150 @@ +/* fortran/dlapy2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAPY2 returns sqrt(x2+y2). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAPY2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION X, Y */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary */ +/* > overflow and unnecessary underflow. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y */ +/* > \verbatim */ +/* > Y is DOUBLE PRECISION */ +/* > X and Y specify the values x and y. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +doublereal dlapy2_(doublereal *x, doublereal *y) +{ + /* System generated locals */ + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + logical x_is_nan__, y_is_nan__; + doublereal w, z__, xabs, yabs; + extern doublereal dlamch_(char *, ftnlen); + extern logical disnan_(doublereal *); + doublereal hugeval; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + x_is_nan__ = disnan_(x); + y_is_nan__ = disnan_(y); + if (x_is_nan__) { + ret_val = *x; + } + if (y_is_nan__) { + ret_val = *y; + } + hugeval = dlamch_((char *)"Overflow", (ftnlen)8); + + if (! (x_is_nan__ || y_is_nan__)) { + xabs = abs(*x); + yabs = abs(*y); + w = max(xabs,yabs); + z__ = min(xabs,yabs); + if (z__ == 0. || w > hugeval) { + ret_val = w; + } else { +/* Computing 2nd power */ + d__1 = z__ / w; + ret_val = w * sqrt(d__1 * d__1 + 1.); + } + } + return ret_val; + +/* End of DLAPY2 */ + +} /* dlapy2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlapy3.cpp b/lib/linalg/dlapy3.cpp new file mode 100644 index 0000000000..528afbf3d8 --- /dev/null +++ b/lib/linalg/dlapy3.cpp @@ -0,0 +1,149 @@ +/* fortran/dlapy3.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAPY3 returns sqrt(x2+y2+z2). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAPY3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION X, Y, Z */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause */ +/* > unnecessary overflow and unnecessary underflow. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y */ +/* > \verbatim */ +/* > Y is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION */ +/* > X, Y and Z specify the values x, y and z. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__) +{ + /* System generated locals */ + doublereal ret_val, d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal w, xabs, yabs, zabs; + extern doublereal dlamch_(char *, ftnlen); + doublereal hugeval; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + hugeval = dlamch_((char *)"Overflow", (ftnlen)8); + xabs = abs(*x); + yabs = abs(*y); + zabs = abs(*z__); +/* Computing MAX */ + d__1 = max(xabs,yabs); + w = max(d__1,zabs); + if (w == 0. || w > hugeval) { +/* W can be zero for max(0,nan,0) */ +/* adding all three entries together will make sure */ +/* NaN will not disappear. */ + ret_val = xabs + yabs + zabs; + } else { +/* Computing 2nd power */ + d__1 = xabs / w; +/* Computing 2nd power */ + d__2 = yabs / w; +/* Computing 2nd power */ + d__3 = zabs / w; + ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3); + } + return ret_val; + +/* End of DLAPY3 */ + +} /* dlapy3_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlarf.cpp b/lib/linalg/dlarf.cpp new file mode 100644 index 0000000000..5534559e63 --- /dev/null +++ b/lib/linalg/dlarf.cpp @@ -0,0 +1,275 @@ +/* fortran/dlarf.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b4 = 1.; +static doublereal c_b5 = 0.; +static integer c__1 = 1; + +/* > \brief \b DLARF applies an elementary reflector to a general rectangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLARF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* DOUBLE PRECISION TAU */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLARF applies a real elementary reflector H to a real m by n matrix */ +/* > C, from either the left or the right. H is represented in the form */ +/* > */ +/* > H = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar and v is a real vector. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV <> 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v, + integer *incv, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work, ftnlen side_len) +{ + /* System generated locals */ + integer c_dim1, c_offset; + doublereal d__1; + + /* Local variables */ + integer i__; + logical applyleft; + extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen); + integer lastc, lastv; + extern integer iladlc_(integer *, integer *, doublereal *, integer *), + iladlr_(integer *, integer *, doublereal *, integer *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + lastv = 0; + lastc = 0; + if (*tau != 0.) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V. */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + while(lastv > 0 && v[i__] == 0.) { + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = iladlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = iladlr_(m, &lastv, &c__[c_offset], ldc); + } + } +/* Note that lastc.eq.0 renders the BLAS operations null; no special */ +/* case is needed at this level. */ + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) */ + + dgemv_((char *)"Transpose", &lastv, &lastc, &c_b4, &c__[c_offset], ldc, & + v[1], incv, &c_b5, &work[1], &c__1, (ftnlen)9); + +/* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**T */ + + d__1 = -(*tau); + dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[ + c_offset], ldc); + } + } else { + +/* Form C * H */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + + dgemv_((char *)"No transpose", &lastc, &lastv, &c_b4, &c__[c_offset], ldc, + &v[1], incv, &c_b5, &work[1], &c__1, (ftnlen)12); + +/* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**T */ + + d__1 = -(*tau); + dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[ + c_offset], ldc); + } + } + return 0; + +/* End of DLARF */ + +} /* dlarf_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlarfb.cpp b/lib/linalg/dlarfb.cpp new file mode 100644 index 0000000000..df15d11900 --- /dev/null +++ b/lib/linalg/dlarfb.cpp @@ -0,0 +1,886 @@ +/* fortran/dlarfb.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b14 = 1.; +static doublereal c_b25 = -1.; + +/* > \brief \b DLARFB applies a block reflector or its transpose to a general rectangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLARFB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, */ +/* T, LDT, C, LDC, WORK, LDWORK ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIRECT, SIDE, STOREV, TRANS */ +/* INTEGER K, LDC, LDT, LDV, LDWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), */ +/* $ WORK( LDWORK, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLARFB applies a real block reflector H or its transpose H**T to a */ +/* > real m by n matrix C, from either the left or the right. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply H or H**T from the Left */ +/* > = 'R': apply H or H**T from the Right */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': apply H (No transpose) */ +/* > = 'T': apply H**T (Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Indicates how H is formed from a product of elementary */ +/* > reflectors */ +/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] STOREV */ +/* > \verbatim */ +/* > STOREV is CHARACTER*1 */ +/* > Indicates how the vectors which define the elementary */ +/* > reflectors are stored: */ +/* > = 'C': Columnwise */ +/* > = 'R': Rowwise */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The order of the matrix T (= the number of elementary */ +/* > reflectors whose product defines the block reflector). */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension */ +/* > (LDV,K) if STOREV = 'C' */ +/* > (LDV,M) if STOREV = 'R' and SIDE = 'L' */ +/* > (LDV,N) if STOREV = 'R' and SIDE = 'R' */ +/* > The matrix V. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); */ +/* > if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); */ +/* > if STOREV = 'R', LDV >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,K) */ +/* > The triangular k by k matrix T in the representation of the */ +/* > block reflector. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (LDWORK,K) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDWORK */ +/* > \verbatim */ +/* > LDWORK is INTEGER */ +/* > The leading dimension of the array WORK. */ +/* > If SIDE = 'L', LDWORK >= max(1,N); */ +/* > if SIDE = 'R', LDWORK >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The shape of the matrix V and the storage of the vectors which define */ +/* > the H(i) is best illustrated by the following example with n = 5 and */ +/* > k = 3. The elements equal to 1 are not stored; the corresponding */ +/* > array elements are modified but restored on exit. The rest of the */ +/* > array is not used. */ +/* > */ +/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ +/* > */ +/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* > ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* > ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ +/* > */ +/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* > ( 1 v3 ) */ +/* > ( 1 ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublereal *v, integer * + ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc, + doublereal *work, integer *ldwork, ftnlen side_len, ftnlen trans_len, + ftnlen direct_len, ftnlen storev_len) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2; + + /* Local variables */ + integer i__, j; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + char transt[1]; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, (char *)"C", (ftnlen)1, (ftnlen)1)) { + + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + +/* Let V = ( V1 ) (first K rows) */ +/* ( V2 ) */ +/* where V1 is unit lower triangular. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form H * C or H**T * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) */ + +/* W := C1**T */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + dtrmm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, k, &c_b14, + &v[v_offset], ldv, &work[work_offset], ldwork, ( + ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*m > *k) { + +/* W := W + C2**T * V2 */ + + i__1 = *m - *k; + dgemm_((char *)"Transpose", (char *)"No transpose", n, k, &i__1, &c_b14, & + c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1], + ldv, &c_b14, &work[work_offset], ldwork, (ftnlen) + 9, (ftnlen)12); + } + +/* W := W * T**T or W * T */ + + dtrmm_((char *)"Right", (char *)"Upper", transt, (char *)"Non-unit", n, k, &c_b14, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - V * W**T */ + + if (*m > *k) { + +/* C2 := C2 - V2 * W**T */ + + i__1 = *m - *k; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__1, n, k, &c_b25, & + v[*k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, &c_b14, &c__[*k + 1 + c_dim1], ldc, ( + ftnlen)12, (ftnlen)9); + } + +/* W := W * V1**T */ + + dtrmm_((char *)"Right", (char *)"Lower", (char *)"Transpose", (char *)"Unit", n, k, &c_b14, & + v[v_offset], ldv, &work[work_offset], ldwork, (ftnlen) + 5, (ftnlen)5, (ftnlen)9, (ftnlen)4); + +/* C1 := C1 - W**T */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form C * H or C * H**T where C = ( C1 C2 ) */ + +/* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ + +/* W := C1 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + dtrmm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", m, k, &c_b14, + &v[v_offset], ldv, &work[work_offset], ldwork, ( + ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*n > *k) { + +/* W := W + C2 * V2 */ + + i__1 = *n - *k; + dgemm_((char *)"No transpose", (char *)"No transpose", m, k, &i__1, & + c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + + 1 + v_dim1], ldv, &c_b14, &work[work_offset], + ldwork, (ftnlen)12, (ftnlen)12); + } + +/* W := W * T or W * T**T */ + + dtrmm_((char *)"Right", (char *)"Upper", trans, (char *)"Non-unit", m, k, &c_b14, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - W * V**T */ + + if (*n > *k) { + +/* C2 := C2 - W * V2**T */ + + i__1 = *n - *k; + dgemm_((char *)"No transpose", (char *)"Transpose", m, &i__1, k, &c_b25, & + work[work_offset], ldwork, &v[*k + 1 + v_dim1], + ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, ( + ftnlen)12, (ftnlen)9); + } + +/* W := W * V1**T */ + + dtrmm_((char *)"Right", (char *)"Lower", (char *)"Transpose", (char *)"Unit", m, k, &c_b14, & + v[v_offset], ldv, &work[work_offset], ldwork, (ftnlen) + 5, (ftnlen)5, (ftnlen)9, (ftnlen)4); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* Let V = ( V1 ) */ +/* ( V2 ) (last K rows) */ +/* where V2 is unit upper triangular. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form H * C or H**T * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) */ + +/* W := C2**T */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + dtrmm_((char *)"Right", (char *)"Upper", (char *)"No transpose", (char *)"Unit", n, k, &c_b14, + &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*m > *k) { + +/* W := W + C1**T * V1 */ + + i__1 = *m - *k; + dgemm_((char *)"Transpose", (char *)"No transpose", n, k, &i__1, &c_b14, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & + work[work_offset], ldwork, (ftnlen)9, (ftnlen)12); + } + +/* W := W * T**T or W * T */ + + dtrmm_((char *)"Right", (char *)"Lower", transt, (char *)"Non-unit", n, k, &c_b14, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - V * W**T */ + + if (*m > *k) { + +/* C1 := C1 - V1 * W**T */ + + i__1 = *m - *k; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__1, n, k, &c_b25, & + v[v_offset], ldv, &work[work_offset], ldwork, & + c_b14, &c__[c_offset], ldc, (ftnlen)12, (ftnlen)9) + ; + } + +/* W := W * V2**T */ + + dtrmm_((char *)"Right", (char *)"Upper", (char *)"Transpose", (char *)"Unit", n, k, &c_b14, & + v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, (ftnlen)5, (ftnlen)5, (ftnlen)9, (ftnlen)4); + +/* C2 := C2 - W**T */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form C * H or C * H**T where C = ( C1 C2 ) */ + +/* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ + +/* W := C2 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + dtrmm_((char *)"Right", (char *)"Upper", (char *)"No transpose", (char *)"Unit", m, k, &c_b14, + &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*n > *k) { + +/* W := W + C1 * V1 */ + + i__1 = *n - *k; + dgemm_((char *)"No transpose", (char *)"No transpose", m, k, &i__1, & + c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b14, &work[work_offset], ldwork, (ftnlen)12, ( + ftnlen)12); + } + +/* W := W * T or W * T**T */ + + dtrmm_((char *)"Right", (char *)"Lower", trans, (char *)"Non-unit", m, k, &c_b14, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - W * V**T */ + + if (*n > *k) { + +/* C1 := C1 - W * V1**T */ + + i__1 = *n - *k; + dgemm_((char *)"No transpose", (char *)"Transpose", m, &i__1, k, &c_b25, & + work[work_offset], ldwork, &v[v_offset], ldv, & + c_b14, &c__[c_offset], ldc, (ftnlen)12, (ftnlen)9) + ; + } + +/* W := W * V2**T */ + + dtrmm_((char *)"Right", (char *)"Upper", (char *)"Transpose", (char *)"Unit", m, k, &c_b14, & + v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, (ftnlen)5, (ftnlen)5, (ftnlen)9, (ftnlen)4); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, (char *)"R", (ftnlen)1, (ftnlen)1)) { + + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + +/* Let V = ( V1 V2 ) (V1: first K columns) */ +/* where V1 is unit upper triangular. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form H * C or H**T * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) */ + +/* W := C1**T */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); +/* L130: */ + } + +/* W := W * V1**T */ + + dtrmm_((char *)"Right", (char *)"Upper", (char *)"Transpose", (char *)"Unit", n, k, &c_b14, & + v[v_offset], ldv, &work[work_offset], ldwork, (ftnlen) + 5, (ftnlen)5, (ftnlen)9, (ftnlen)4); + if (*m > *k) { + +/* W := W + C2**T * V2**T */ + + i__1 = *m - *k; + dgemm_((char *)"Transpose", (char *)"Transpose", n, k, &i__1, &c_b14, & + c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b14, &work[work_offset], ldwork, ( + ftnlen)9, (ftnlen)9); + } + +/* W := W * T**T or W * T */ + + dtrmm_((char *)"Right", (char *)"Upper", transt, (char *)"Non-unit", n, k, &c_b14, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - V**T * W**T */ + + if (*m > *k) { + +/* C2 := C2 - V2**T * W**T */ + + i__1 = *m - *k; + dgemm_((char *)"Transpose", (char *)"Transpose", &i__1, n, k, &c_b25, &v[( + *k + 1) * v_dim1 + 1], ldv, &work[work_offset], + ldwork, &c_b14, &c__[*k + 1 + c_dim1], ldc, ( + ftnlen)9, (ftnlen)9); + } + +/* W := W * V1 */ + + dtrmm_((char *)"Right", (char *)"Upper", (char *)"No transpose", (char *)"Unit", n, k, &c_b14, + &v[v_offset], ldv, &work[work_offset], ldwork, ( + ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + +/* C1 := C1 - W**T */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1]; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form C * H or C * H**T where C = ( C1 C2 ) */ + +/* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) */ + +/* W := C1 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1**T */ + + dtrmm_((char *)"Right", (char *)"Upper", (char *)"Transpose", (char *)"Unit", m, k, &c_b14, & + v[v_offset], ldv, &work[work_offset], ldwork, (ftnlen) + 5, (ftnlen)5, (ftnlen)9, (ftnlen)4); + if (*n > *k) { + +/* W := W + C2 * V2**T */ + + i__1 = *n - *k; + dgemm_((char *)"No transpose", (char *)"Transpose", m, k, &i__1, &c_b14, & + c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b14, &work[work_offset], + ldwork, (ftnlen)12, (ftnlen)9); + } + +/* W := W * T or W * T**T */ + + dtrmm_((char *)"Right", (char *)"Upper", trans, (char *)"Non-unit", m, k, &c_b14, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = *n - *k; + dgemm_((char *)"No transpose", (char *)"No transpose", m, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[(*k + 1) * + v_dim1 + 1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + + 1], ldc, (ftnlen)12, (ftnlen)12); + } + +/* W := W * V1 */ + + dtrmm_((char *)"Right", (char *)"Upper", (char *)"No transpose", (char *)"Unit", m, k, &c_b14, + &v[v_offset], ldv, &work[work_offset], ldwork, ( + ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1]; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* Let V = ( V1 V2 ) (V2: last K columns) */ +/* where V2 is unit lower triangular. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form H * C or H**T * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) */ + +/* W := C2**T */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2**T */ + + dtrmm_((char *)"Right", (char *)"Lower", (char *)"Transpose", (char *)"Unit", n, k, &c_b14, & + v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset] + , ldwork, (ftnlen)5, (ftnlen)5, (ftnlen)9, (ftnlen)4); + if (*m > *k) { + +/* W := W + C1**T * V1**T */ + + i__1 = *m - *k; + dgemm_((char *)"Transpose", (char *)"Transpose", n, k, &i__1, &c_b14, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & + work[work_offset], ldwork, (ftnlen)9, (ftnlen)9); + } + +/* W := W * T**T or W * T */ + + dtrmm_((char *)"Right", (char *)"Lower", transt, (char *)"Non-unit", n, k, &c_b14, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - V**T * W**T */ + + if (*m > *k) { + +/* C1 := C1 - V1**T * W**T */ + + i__1 = *m - *k; + dgemm_((char *)"Transpose", (char *)"Transpose", &i__1, n, k, &c_b25, &v[ + v_offset], ldv, &work[work_offset], ldwork, & + c_b14, &c__[c_offset], ldc, (ftnlen)9, (ftnlen)9); + } + +/* W := W * V2 */ + + dtrmm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, k, &c_b14, + &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, (ftnlen)5, (ftnlen)5, (ftnlen) + 12, (ftnlen)4); + +/* C2 := C2 - W**T */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j * + work_dim1]; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form C * H or C * H' where C = ( C1 C2 ) */ + +/* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) */ + +/* W := C2 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2**T */ + + dtrmm_((char *)"Right", (char *)"Lower", (char *)"Transpose", (char *)"Unit", m, k, &c_b14, & + v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset] + , ldwork, (ftnlen)5, (ftnlen)5, (ftnlen)9, (ftnlen)4); + if (*n > *k) { + +/* W := W + C1 * V1**T */ + + i__1 = *n - *k; + dgemm_((char *)"No transpose", (char *)"Transpose", m, k, &i__1, &c_b14, & + c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, & + work[work_offset], ldwork, (ftnlen)12, (ftnlen)9); + } + +/* W := W * T or W * T**T */ + + dtrmm_((char *)"Right", (char *)"Lower", trans, (char *)"Non-unit", m, k, &c_b14, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = *n - *k; + dgemm_((char *)"No transpose", (char *)"No transpose", m, &i__1, k, & + c_b25, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b14, &c__[c_offset], ldc, (ftnlen)12, ( + ftnlen)12); + } + +/* W := W * V2 */ + + dtrmm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", m, k, &c_b14, + &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, (ftnlen)5, (ftnlen)5, (ftnlen) + 12, (ftnlen)4); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j * + work_dim1]; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of DLARFB */ + +} /* dlarfb_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlarfg.cpp b/lib/linalg/dlarfg.cpp new file mode 100644 index 0000000000..cb544ccf18 --- /dev/null +++ b/lib/linalg/dlarfg.cpp @@ -0,0 +1,240 @@ +/* fortran/dlarfg.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLARFG generates an elementary reflector (Householder matrix). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLARFG + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX, N */ +/* DOUBLE PRECISION ALPHA, TAU */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION X( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLARFG generates a real elementary reflector H of order n, such */ +/* > that */ +/* > */ +/* > H * ( alpha ) = ( beta ), H**T * H = I. */ +/* > ( x ) ( 0 ) */ +/* > */ +/* > where alpha and beta are scalars, and x is an (n-1)-element real */ +/* > vector. H is represented in the form */ +/* > */ +/* > H = I - tau * ( 1 ) * ( 1 v**T ) , */ +/* > ( v ) */ +/* > */ +/* > where tau is a real scalar and v is a real (n-1)-element */ +/* > vector. */ +/* > */ +/* > If the elements of x are all zero, then tau = 0 and H is taken to be */ +/* > the unit matrix. */ +/* > */ +/* > Otherwise 1 <= tau <= 2. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the elementary reflector. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION */ +/* > On entry, the value alpha. */ +/* > On exit, it is overwritten with the value beta. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension */ +/* > (1+(N-2)*abs(INCX)) */ +/* > On entry, the vector x. */ +/* > On exit, it is overwritten with the vector v. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The increment between elements of X. INCX > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION */ +/* > The value tau. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x, + integer *incx, doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer j, knt; + doublereal beta; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal xnorm; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, + ftnlen); + doublereal safmin, rsafmn; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 1) { + *tau = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = dnrm2_(&i__1, &x[1], incx); + + if (xnorm == 0.) { + +/* H = I */ + + *tau = 0.; + } else { + +/* general case */ + + d__1 = dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + safmin = dlamch_((char *)"S", (ftnlen)1) / dlamch_((char *)"E", (ftnlen)1); + knt = 0; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + + rsafmn = 1. / safmin; +L10: + ++knt; + i__1 = *n - 1; + dscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + *alpha *= rsafmn; + if (abs(beta) < safmin && knt < 20) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = dnrm2_(&i__1, &x[1], incx); + d__1 = dlapy2_(alpha, &xnorm); + beta = -d_sign(&d__1, alpha); + } + *tau = (beta - *alpha) / beta; + i__1 = *n - 1; + d__1 = 1. / (*alpha - beta); + dscal_(&i__1, &d__1, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + *alpha = beta; + } + + return 0; + +/* End of DLARFG */ + +} /* dlarfg_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlarft.cpp b/lib/linalg/dlarft.cpp new file mode 100644 index 0000000000..220c9f0082 --- /dev/null +++ b/lib/linalg/dlarft.cpp @@ -0,0 +1,417 @@ +/* static/dlarft.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b7 = 1.; + +/* > \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLARFT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIRECT, STOREV */ +/* INTEGER K, LDT, LDV, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLARFT forms the triangular factor T of a real block reflector H */ +/* > of order n, which is defined as a product of k elementary reflectors. */ +/* > */ +/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ +/* > */ +/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ +/* > */ +/* > If STOREV = 'C', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th column of the array V, and */ +/* > */ +/* > H = I - V * T * V**T */ +/* > */ +/* > If STOREV = 'R', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th row of the array V, and */ +/* > */ +/* > H = I - V**T * T * V */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Specifies the order in which the elementary reflectors are */ +/* > multiplied to form the block reflector: */ +/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] STOREV */ +/* > \verbatim */ +/* > STOREV is CHARACTER*1 */ +/* > Specifies how the vectors which define the elementary */ +/* > reflectors are stored (see also Further Details): */ +/* > = 'C': columnwise */ +/* > = 'R': rowwise */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the block reflector H. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The order of the triangular factor T (= the number of */ +/* > elementary reflectors). K >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is DOUBLE PRECISION array, dimension */ +/* > (LDV,K) if STOREV = 'C' */ +/* > (LDV,N) if STOREV = 'R' */ +/* > The matrix V. See further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is DOUBLE PRECISION array, dimension (LDT,K) */ +/* > The k by k triangular factor T of the block reflector. */ +/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* > lower triangular. The rest of the array is not used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= K. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The shape of the matrix V and the storage of the vectors which define */ +/* > the H(i) is best illustrated by the following example with n = 5 and */ +/* > k = 3. The elements equal to 1 are not stored. */ +/* > */ +/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ +/* > */ +/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* > ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* > ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ +/* > */ +/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* > ( 1 v3 ) */ +/* > ( 1 ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer * + k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, + integer *ldt, ftnlen direct_len, ftnlen storev_len) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, prevlastv; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen); + integer lastv; + extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, + ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = max(i__,prevlastv); + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = 0.; + } + } else { + +/* general case */ + + if (lsame_(storev, (char *)"C", (ftnlen)1, (ftnlen)1)) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + goto L219; + } + } +L219: + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[i__ + j * v_dim1]; + } + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) */ + + i__2 = j - i__; + i__3 = i__ - 1; + d__1 = -tau[i__]; + dgemv_((char *)"Transpose", &i__2, &i__3, &d__1, &v[i__ + 1 + + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], &c__1, & + c_b7, &t[i__ * t_dim1 + 1], &c__1, (ftnlen)9); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + goto L235; + } + } +L235: + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[j + i__ * v_dim1]; + } + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T */ + + i__2 = i__ - 1; + i__3 = j - i__; + d__1 = -tau[i__]; + dgemv_((char *)"No transpose", &i__2, &i__3, &d__1, &v[(i__ + 1) * + v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], + ldv, &c_b7, &t[i__ * t_dim1 + 1], &c__1, (ftnlen) + 12); + } + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + dtrmv_((char *)"Upper", (char *)"No transpose", (char *)"Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1, (ftnlen) + 5, (ftnlen)12, (ftnlen)8); + t[i__ + i__ * t_dim1] = tau[i__]; + if (i__ > 1) { + prevlastv = max(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + if (tau[i__] == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + t[j + i__ * t_dim1] = 0.; + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, (char *)"C", (ftnlen)1, (ftnlen)1)) { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[lastv + i__ * v_dim1] != 0.) { + goto L280; + } + } +L280: + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[*n - *k + i__ + + j * v_dim1]; + } + j = max(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) */ + + i__1 = *n - *k + i__ - j; + i__2 = *k - i__; + d__1 = -tau[i__]; + dgemv_((char *)"Transpose", &i__1, &i__2, &d__1, &v[j + (i__ + + 1) * v_dim1], ldv, &v[j + i__ * v_dim1], & + c__1, &c_b7, &t[i__ + 1 + i__ * t_dim1], & + c__1, (ftnlen)9); + } else { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + if (v[i__ + lastv * v_dim1] != 0.) { + goto L296; + } + } +L296: + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + t[j + i__ * t_dim1] = -tau[i__] * v[j + (*n - *k + + i__) * v_dim1]; + } + j = max(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T */ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j; + d__1 = -tau[i__]; + dgemv_((char *)"No transpose", &i__1, &i__2, &d__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b7, &t[i__ + 1 + i__ * t_dim1], &c__1, + (ftnlen)12); + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + dtrmv_((char *)"Lower", (char *)"No transpose", (char *)"Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)8) + ; + if (i__ > 1) { + prevlastv = min(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + t[i__ + i__ * t_dim1] = tau[i__]; + } + } + } + return 0; + +/* End of DLARFT */ + +} /* dlarft_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlartg.cpp b/lib/linalg/dlartg.cpp new file mode 100644 index 0000000000..118d7865f6 --- /dev/null +++ b/lib/linalg/dlartg.cpp @@ -0,0 +1,258 @@ +/* fortran/dlartg.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLARTG generates a plane rotation with real cosine and real sine. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLARTG + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLARTG( F, G, CS, SN, R ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION CS, F, G, R, SN */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLARTG generate a plane rotation so that */ +/* > */ +/* > [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. */ +/* > [ -SN CS ] [ G ] [ 0 ] */ +/* > */ +/* > This is a slower, more accurate version of the BLAS1 routine DROTG, */ +/* > with the following other differences: */ +/* > F and G are unchanged on return. */ +/* > If G=0, then CS=1 and SN=0. */ +/* > If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any */ +/* > floating point operations (saves work in DBDSQR when */ +/* > there are zeros on the diagonal). */ +/* > */ +/* > If F exceeds G in magnitude, CS will be positive. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] F */ +/* > \verbatim */ +/* > F is DOUBLE PRECISION */ +/* > The first component of vector to be rotated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] G */ +/* > \verbatim */ +/* > G is DOUBLE PRECISION */ +/* > The second component of vector to be rotated. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] CS */ +/* > \verbatim */ +/* > CS is DOUBLE PRECISION */ +/* > The cosine of the rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SN */ +/* > \verbatim */ +/* > SN is DOUBLE PRECISION */ +/* > The sine of the rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] R */ +/* > \verbatim */ +/* > R is DOUBLE PRECISION */ +/* > The nonzero component of the rotated vector. */ +/* > */ +/* > This version has a few statements commented out for thread safety */ +/* > (machine parameters are computed on each entry). 10 feb 03, SJH. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs, + doublereal *sn, doublereal *r__) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal f1, g1, eps, scale; + integer count; + doublereal safmn2, safmx2; + extern doublereal dlamch_(char *, ftnlen); + doublereal safmin; + + +/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* LOGICAL FIRST */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 */ +/* .. */ +/* .. Data statements .. */ +/* DATA FIRST / .TRUE. / */ +/* .. */ +/* .. Executable Statements .. */ + +/* IF( FIRST ) THEN */ + safmin = dlamch_((char *)"S", (ftnlen)1); + eps = dlamch_((char *)"E", (ftnlen)1); + d__1 = dlamch_((char *)"B", (ftnlen)1); + i__1 = (integer) (log(safmin / eps) / log(dlamch_((char *)"B", (ftnlen)1)) / 2.); + safmn2 = pow_di(&d__1, &i__1); + safmx2 = 1. / safmn2; +/* FIRST = .FALSE. */ +/* END IF */ + if (*g == 0.) { + *cs = 1.; + *sn = 0.; + *r__ = *f; + } else if (*f == 0.) { + *cs = 0.; + *sn = 1.; + *r__ = *g; + } else { + f1 = *f; + g1 = *g; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale >= safmx2) { + count = 0; +L10: + ++count; + f1 *= safmn2; + g1 *= safmn2; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale >= safmx2) { + goto L10; + } +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmx2; +/* L20: */ + } + } else if (scale <= safmn2) { + count = 0; +L30: + ++count; + f1 *= safmx2; + g1 *= safmx2; +/* Computing MAX */ + d__1 = abs(f1), d__2 = abs(g1); + scale = max(d__1,d__2); + if (scale <= safmn2) { + goto L30; + } +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + i__1 = count; + for (i__ = 1; i__ <= i__1; ++i__) { + *r__ *= safmn2; +/* L40: */ + } + } else { +/* Computing 2nd power */ + d__1 = f1; +/* Computing 2nd power */ + d__2 = g1; + *r__ = sqrt(d__1 * d__1 + d__2 * d__2); + *cs = f1 / *r__; + *sn = g1 / *r__; + } + if (abs(*f) > abs(*g) && *cs < 0.) { + *cs = -(*cs); + *sn = -(*sn); + *r__ = -(*r__); + } + } + return 0; + +/* End of DLARTG */ + +} /* dlartg_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlas2.cpp b/lib/linalg/dlas2.cpp new file mode 100644 index 0000000000..e3a53a9bac --- /dev/null +++ b/lib/linalg/dlas2.cpp @@ -0,0 +1,212 @@ +/* fortran/dlas2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLAS2 computes singular values of a 2-by-2 triangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLAS2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION F, G, H, SSMAX, SSMIN */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLAS2 computes the singular values of the 2-by-2 matrix */ +/* > [ F G ] */ +/* > [ 0 H ]. */ +/* > On return, SSMIN is the smaller singular value and SSMAX is the */ +/* > larger singular value. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] F */ +/* > \verbatim */ +/* > F is DOUBLE PRECISION */ +/* > The (1,1) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] G */ +/* > \verbatim */ +/* > G is DOUBLE PRECISION */ +/* > The (1,2) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] H */ +/* > \verbatim */ +/* > H is DOUBLE PRECISION */ +/* > The (2,2) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SSMIN */ +/* > \verbatim */ +/* > SSMIN is DOUBLE PRECISION */ +/* > The smaller singular value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SSMAX */ +/* > \verbatim */ +/* > SSMAX is DOUBLE PRECISION */ +/* > The larger singular value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Barring over/underflow, all output quantities are correct to within */ +/* > a few units in the last place (ulps), even in the absence of a guard */ +/* > digit in addition/subtraction. */ +/* > */ +/* > In IEEE arithmetic, the code works correctly if one matrix element is */ +/* > infinite. */ +/* > */ +/* > Overflow will not occur unless the largest singular value itself */ +/* > overflows, or is within a few ulps of overflow. (On machines with */ +/* > partial overflow, like the Cray, overflow may occur if the largest */ +/* > singular value is within a factor of 2 of overflow.) */ +/* > */ +/* > Underflow is harmless if underflow is gradual. Otherwise, results */ +/* > may correspond to a matrix modified by perturbations of size near */ +/* > the underflow threshold. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax) +{ + /* System generated locals */ + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ==================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + fa = abs(*f); + ga = abs(*g); + ha = abs(*h__); + fhmn = min(fa,ha); + fhmx = max(fa,ha); + if (fhmn == 0.) { + *ssmin = 0.; + if (fhmx == 0.) { + *ssmax = ga; + } else { +/* Computing 2nd power */ + d__1 = min(fhmx,ga) / max(fhmx,ga); + *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.); + } + } else { + if (ga < fhmx) { + as = fhmn / fhmx + 1.; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + d__1 = ga / fhmx; + au = d__1 * d__1; + c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au)); + *ssmin = fhmn * c__; + *ssmax = fhmx / c__; + } else { + au = fhmx / ga; + if (au == 0.) { + +/* Avoid possible harmful underflow if exponent range */ +/* asymmetric (true SSMIN may not underflow even if */ +/* AU underflows) */ + + *ssmin = fhmn * fhmx / ga; + *ssmax = ga; + } else { + as = fhmn / fhmx + 1.; + at = (fhmx - fhmn) / fhmx; +/* Computing 2nd power */ + d__1 = as * au; +/* Computing 2nd power */ + d__2 = at * au; + c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.)); + *ssmin = fhmn * c__ * au; + *ssmin += *ssmin; + *ssmax = ga / (c__ + c__); + } + } + } + return 0; + +/* End of DLAS2 */ + +} /* dlas2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlascl.cpp b/lib/linalg/dlascl.cpp new file mode 100644 index 0000000000..932e1c63cb --- /dev/null +++ b/lib/linalg/dlascl.cpp @@ -0,0 +1,448 @@ +/* fortran/dlascl.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASCL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER TYPE */ +/* INTEGER INFO, KL, KU, LDA, M, N */ +/* DOUBLE PRECISION CFROM, CTO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASCL multiplies the M by N real matrix A by the real scalar */ +/* > CTO/CFROM. This is done without over/underflow as long as the final */ +/* > result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that */ +/* > A may be full, upper triangular, lower triangular, upper Hessenberg, */ +/* > or banded. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TYPE */ +/* > \verbatim */ +/* > TYPE is CHARACTER*1 */ +/* > TYPE indices the storage type of the input matrix. */ +/* > = 'G': A is a full matrix. */ +/* > = 'L': A is a lower triangular matrix. */ +/* > = 'U': A is an upper triangular matrix. */ +/* > = 'H': A is an upper Hessenberg matrix. */ +/* > = 'B': A is a symmetric band matrix with lower bandwidth KL */ +/* > and upper bandwidth KU and with the only the lower */ +/* > half stored. */ +/* > = 'Q': A is a symmetric band matrix with lower bandwidth KL */ +/* > and upper bandwidth KU and with the only the upper */ +/* > half stored. */ +/* > = 'Z': A is a band matrix with lower bandwidth KL and upper */ +/* > bandwidth KU. See DGBTRF for storage details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The lower bandwidth of A. Referenced only if TYPE = 'B', */ +/* > 'Q' or 'Z'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The upper bandwidth of A. Referenced only if TYPE = 'B', */ +/* > 'Q' or 'Z'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CFROM */ +/* > \verbatim */ +/* > CFROM is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CTO */ +/* > \verbatim */ +/* > CTO is DOUBLE PRECISION */ +/* > */ +/* > The matrix A is multiplied by CTO/CFROM. A(I,J) is computed */ +/* > without over/underflow if the final result CTO*A(I,J)/CFROM */ +/* > can be represented without over/underflow. CFROM must be */ +/* > nonzero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The matrix to be multiplied by CTO/CFROM. See TYPE for the */ +/* > storage type. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If TYPE = 'G', 'L', 'U', 'H', LDA >= max(1,M); */ +/* > TYPE = 'B', LDA >= KL+1; */ +/* > TYPE = 'Q', LDA >= KU+1; */ +/* > TYPE = 'Z', LDA >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > 0 - successful exit */ +/* > <0 - if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublereal *a, integer *lda, integer *info, ftnlen type_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j, k1, k2, k3, k4; + doublereal mul, cto1; + logical done; + doublereal ctoc; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer itype; + doublereal cfrom1; + extern doublereal dlamch_(char *, ftnlen); + doublereal cfromc; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum, smlnum; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, (char *)"G", (ftnlen)1, (ftnlen)1)) { + itype = 0; + } else if (lsame_(type__, (char *)"L", (ftnlen)1, (ftnlen)1)) { + itype = 1; + } else if (lsame_(type__, (char *)"U", (ftnlen)1, (ftnlen)1)) { + itype = 2; + } else if (lsame_(type__, (char *)"H", (ftnlen)1, (ftnlen)1)) { + itype = 3; + } else if (lsame_(type__, (char *)"B", (ftnlen)1, (ftnlen)1)) { + itype = 4; + } else if (lsame_(type__, (char *)"Q", (ftnlen)1, (ftnlen)1)) { + itype = 5; + } else if (lsame_(type__, (char *)"Z", (ftnlen)1, (ftnlen)1)) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0. || disnan_(cfrom)) { + *info = -4; + } else if (disnan_(cto)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASCL", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = dlamch_((char *)"S", (ftnlen)1); + bignum = 1. / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + if (cfrom1 == cfromc) { +/* CFROMC is an inf. Multiply by a correctly signed zero for */ +/* finite CTOC, or a NaN if CTOC is infinite. */ + mul = ctoc / cfromc; + done = TRUE_; + cto1 = ctoc; + } else { + cto1 = ctoc / bignum; + if (cto1 == ctoc) { +/* CTOC is either 0 or an inf. In both cases, CTOC itself */ +/* serves as the correct multiplication factor. */ + mul = ctoc; + done = TRUE_; + cfromc = 1.; + } else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (abs(cto1) > abs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + if (mul == 1.) { + return 0; + } + } + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] *= mul; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of DLASCL */ + +} /* dlascl_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasd4.cpp b/lib/linalg/dlasd4.cpp new file mode 100644 index 0000000000..b0d8d1f7fe --- /dev/null +++ b/lib/linalg/dlasd4.cpp @@ -0,0 +1,1198 @@ +/* fortran/dlasd4.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASD4 computes the square root of the i-th updated eigenvalue of a positive symmetric rank-one + modification to a positive diagonal matrix. Used by dbdsdc. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASD4 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER I, INFO, N */ +/* DOUBLE PRECISION RHO, SIGMA */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), DELTA( * ), WORK( * ), Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This subroutine computes the square root of the I-th updated */ +/* > eigenvalue of a positive symmetric rank-one modification to */ +/* > a positive diagonal matrix whose entries are given as the squares */ +/* > of the corresponding entries in the array d, and that */ +/* > */ +/* > 0 <= D(i) < D(j) for i < j */ +/* > */ +/* > and that RHO > 0. This is arranged by the calling routine, and is */ +/* > no loss in generality. The rank-one modified system is thus */ +/* > */ +/* > diag( D ) * diag( D ) + RHO * Z * Z_transpose. */ +/* > */ +/* > where we assume the Euclidean norm of Z is 1. */ +/* > */ +/* > The method consists of approximating the rational functions in the */ +/* > secular equation by simpler interpolating rational functions. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The length of all arrays. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] I */ +/* > \verbatim */ +/* > I is INTEGER */ +/* > The index of the eigenvalue to be computed. 1 <= I <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension ( N ) */ +/* > The original eigenvalues. It is assumed that they are in */ +/* > order, 0 <= D(I) < D(J) for I < J. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( N ) */ +/* > The components of the updating vector. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DELTA */ +/* > \verbatim */ +/* > DELTA is DOUBLE PRECISION array, dimension ( N ) */ +/* > If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th */ +/* > component. If N = 1, then DELTA(1) = 1. The vector DELTA */ +/* > contains the information necessary to construct the */ +/* > (singular) eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > The scalar in the symmetric updating formula. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SIGMA */ +/* > \verbatim */ +/* > SIGMA is DOUBLE PRECISION */ +/* > The computed sigma_I, the I-th updated eigenvalue. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension ( N ) */ +/* > If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th */ +/* > component. If N = 1, then WORK( 1 ) = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > > 0: if INFO = 1, the updating process failed. */ +/* > \endverbatim */ + +/* > \par Internal Parameters: */ +/* ========================= */ +/* > */ +/* > \verbatim */ +/* > Logical variable ORGATI (origin-at-i?) is used for distinguishing */ +/* > whether D(i) or D(i+1) is treated as the origin. */ +/* > */ +/* > ORGATI = .true. origin at i */ +/* > ORGATI = .false. origin at i+1 */ +/* > */ +/* > Logical variable SWTCH3 (switch-for-3-poles?) is for noting */ +/* > if we are working with THREE poles! */ +/* > */ +/* > MAXIT is the maximum number of iterations allowed for each */ +/* > eigenvalue. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ren-Cang Li, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__, + doublereal *z__, doublereal *delta, doublereal *rho, doublereal * + sigma, doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal a, b, c__; + integer j; + doublereal w, dd[3]; + integer ii; + doublereal dw, zz[3]; + integer ip1; + doublereal sq2, eta, phi, eps, tau, psi; + integer iim1, iip1; + doublereal tau2, dphi, sglb, dpsi, sgub; + integer iter; + doublereal temp, prew, temp1, temp2, dtiim, delsq, dtiip; + integer niter; + doublereal dtisq; + logical swtch; + doublereal dtnsq; + extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *) + , dlasd5_(integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + doublereal delsq2, dtnsq1; + logical swtch3; + extern doublereal dlamch_(char *, ftnlen); + logical orgati; + doublereal erretm, dtipsq, rhoinv; + logical geomavg; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Since this routine is called in an inner loop, we do no argument */ +/* checking. */ + +/* Quick return for N=1 and 2. */ + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + *info = 0; + if (*n == 1) { + +/* Presumably, I=1 upon entry */ + + *sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]); + delta[1] = 1.; + work[1] = 1.; + return 0; + } + if (*n == 2) { + dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]); + return 0; + } + +/* Compute machine epsilon */ + + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + rhoinv = 1. / *rho; + tau2 = 0.; + +/* The case I = N */ + + if (*i__ == *n) { + +/* Initialize some basic variables */ + + ii = *n - 1; + niter = 1; + +/* Calculate initial guess */ + + temp = *rho / 2.; + +/* If ||Z||_2 is not one, then TEMP should be set to */ +/* RHO * ||Z||_2^2 / TWO */ + + temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp)); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*n] + temp1; + delta[j] = d__[j] - d__[*n] - temp1; +/* L10: */ + } + + psi = 0.; + i__1 = *n - 2; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (delta[j] * work[j]); +/* L20: */ + } + + c__ = rhoinv + psi; + w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[* + n] / (delta[*n] * work[*n]); + + if (w <= 0.) { + temp1 = sqrt(d__[*n] * d__[*n] + *rho); + temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[* + n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * + z__[*n] / *rho; + +/* The following TAU2 is to approximate */ +/* SIGMA_n^2 - D( N )*D( N ) */ + + if (c__ <= temp) { + tau = *rho; + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[* + n]; + b = z__[*n] * z__[*n] * delsq; + if (a < 0.) { + tau2 = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau2 = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + tau = tau2 / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau2)); + } + +/* It can be proved that */ +/* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU2 <= D(N)^2+RHO */ + + } else { + delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]); + a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]; + b = z__[*n] * z__[*n] * delsq; + +/* The following TAU2 is to approximate */ +/* SIGMA_n^2 - D( N )*D( N ) */ + + if (a < 0.) { + tau2 = b * 2. / (sqrt(a * a + b * 4. * c__) - a); + } else { + tau2 = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.); + } + tau = tau2 / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau2)); + +/* It can be proved that */ +/* D(N)^2 < D(N)^2+TAU2 < SIGMA(N)^2 < D(N)^2+RHO/2 */ + + } + +/* The following TAU is to approximate SIGMA_n - D( N ) */ + +/* TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) ) */ + + *sigma = d__[*n] + tau; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] = d__[j] - d__[*n] - tau; + work[j] = d__[j] + d__[*n] + tau; +/* L30: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (delta[j] * work[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L40: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + temp = z__[*n] / (delta[*n] * work[*n]); + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv; +/* $ + ABS( TAU2 )*( DPSI+DPHI ) */ + + w = rhoinv + phi + psi; + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + ++niter; + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi); + b = dtnsq * dtnsq1 * w; + if (c__ < 0.) { + c__ = abs(c__); + } + if (c__ == 0.) { + eta = *rho - *sigma * *sigma; + } else if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__ + * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))) + ); + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp > *rho) { + eta = *rho + dtnsq; + } + + eta /= *sigma + sqrt(eta + *sigma * *sigma); + tau += eta; + *sigma += eta; + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L50: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L60: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + tau2 = work[*n] * delta[*n]; + temp = z__[*n] / tau2; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv; +/* $ + ABS( TAU2 )*( DPSI+DPHI ) */ + + w = rhoinv + phi + psi; + +/* Main loop to update the values of the array DELTA */ + + iter = niter + 1; + + for (niter = iter; niter <= 400; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + +/* Calculate the new step */ + + dtnsq1 = work[*n - 1] * delta[*n - 1]; + dtnsq = work[*n] * delta[*n]; + c__ = w - dtnsq1 * dpsi - dtnsq * dphi; + a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi); + b = dtnsq1 * dtnsq * w; + if (a >= 0.) { + eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta > 0.) { + eta = -w / (dpsi + dphi); + } + temp = eta - dtnsq; + if (temp <= 0.) { + eta /= 2.; + } + + eta /= *sigma + sqrt(eta + *sigma * *sigma); + tau += eta; + *sigma += eta; + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + delta[j] -= eta; + work[j] += eta; +/* L70: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = ii; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L80: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + tau2 = work[*n] * delta[*n]; + temp = z__[*n] / tau2; + phi = z__[*n] * temp; + dphi = temp * temp; + erretm = (-phi - psi) * 8. + erretm - phi + rhoinv; +/* $ + ABS( TAU2 )*( DPSI+DPHI ) */ + + w = rhoinv + phi + psi; +/* L90: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + goto L240; + +/* End for the case I = N */ + + } else { + +/* The case for I < N */ + + niter = 1; + ip1 = *i__ + 1; + +/* Calculate initial guess */ + + delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]); + delsq2 = delsq / 2.; + sq2 = sqrt((d__[*i__] * d__[*i__] + d__[ip1] * d__[ip1]) / 2.); + temp = delsq2 / (d__[*i__] + sq2); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[*i__] + temp; + delta[j] = d__[j] - d__[*i__] - temp; +/* L100: */ + } + + psi = 0.; + i__1 = *i__ - 1; + for (j = 1; j <= i__1; ++j) { + psi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L110: */ + } + + phi = 0.; + i__1 = *i__ + 2; + for (j = *n; j >= i__1; --j) { + phi += z__[j] * z__[j] / (work[j] * delta[j]); +/* L120: */ + } + c__ = rhoinv + psi + phi; + w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[ + ip1] * z__[ip1] / (work[ip1] * delta[ip1]); + + geomavg = FALSE_; + if (w > 0.) { + +/* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 */ + +/* We choose d(i) as origin. */ + + orgati = TRUE_; + ii = *i__; + sglb = 0.; + sgub = delsq2 / (d__[*i__] + sq2); + a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1]; + b = z__[*i__] * z__[*i__] * delsq; + if (a > 0.) { + tau2 = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } else { + tau2 = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + +/* TAU2 now is an estimation of SIGMA^2 - D( I )^2. The */ +/* following, however, is the corresponding estimation of */ +/* SIGMA - D( I ). */ + + tau = tau2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau2)); + temp = sqrt(eps); + if (d__[*i__] <= temp * d__[ip1] && (d__1 = z__[*i__], abs(d__1)) + <= temp && d__[*i__] > 0.) { +/* Computing MIN */ + d__1 = d__[*i__] * 10.; + tau = min(d__1,sgub); + geomavg = TRUE_; + } + } else { + +/* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 */ + +/* We choose d(i+1) as origin. */ + + orgati = FALSE_; + ii = ip1; + sglb = -delsq2 / (d__[ii] + sq2); + sgub = 0.; + a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1]; + b = z__[ip1] * z__[ip1] * delsq; + if (a < 0.) { + tau2 = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs( + d__1)))); + } else { + tau2 = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) / + (c__ * 2.); + } + +/* TAU2 now is an estimation of SIGMA^2 - D( IP1 )^2. The */ +/* following, however, is the corresponding estimation of */ +/* SIGMA - D( IP1 ). */ + + tau = tau2 / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau2, + abs(d__1)))); + } + + *sigma = d__[ii] + tau; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] = d__[j] + d__[ii] + tau; + delta[j] = d__[j] - d__[ii] - tau; +/* L130: */ + } + iim1 = ii - 1; + iip1 = ii + 1; + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L150: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L160: */ + } + + w = rhoinv + phi + psi; + +/* W is the value of the secular function with */ +/* its ii-th element removed. */ + + swtch3 = FALSE_; + if (orgati) { + if (w < 0.) { + swtch3 = TRUE_; + } + } else { + if (w > 0.) { + swtch3 = TRUE_; + } + } + if (ii == 1 || ii == *n) { + swtch3 = FALSE_; + } + + temp = z__[ii] / (work[ii] * delta[ii]); + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w += temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.; +/* $ + ABS( TAU2 )*DW */ + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { + goto L240; + } + + if (w <= 0.) { + sglb = max(sglb,tau); + } else { + sgub = min(sgub,tau); + } + +/* Calculate the new step */ + + ++niter; + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi + + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / ( + c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) * + (d__[iim1] + d__[iip1]) * temp1; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) * + (d__[iim1] + d__[iip1]) * temp1; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + zz[1] = z__[ii] * z__[ii]; + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + + if (*info != 0) { + +/* If INFO is not 0, i.e., DLAED6 failed, switch back */ +/* to 2 pole interpolation. */ + + swtch3 = FALSE_; + *info = 0; + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * ( + dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi + + dphi); + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta >= 0.) { + eta = -w / dw; + } + + eta /= *sigma + sqrt(*sigma * *sigma + eta); + temp = tau + eta; + if (temp > sgub || temp < sglb) { + if (w < 0.) { + eta = (sgub - tau) / 2.; + } else { + eta = (sglb - tau) / 2.; + } + if (geomavg) { + if (w < 0.) { + if (tau > 0.) { + eta = sqrt(sgub * tau) - tau; + } + } else { + if (sglb > 0.) { + eta = sqrt(sglb * tau) - tau; + } + } + } + } + + prew = w; + + tau += eta; + *sigma += eta; + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L170: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L180: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L190: */ + } + + tau2 = work[ii] * delta[ii]; + temp = z__[ii] / tau2; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.; +/* $ + ABS( TAU2 )*DW */ + + swtch = FALSE_; + if (orgati) { + if (-w > abs(prew) / 10.) { + swtch = TRUE_; + } + } else { + if (w > abs(prew) / 10.) { + swtch = TRUE_; + } + } + +/* Main loop to update the values of the array DELTA and WORK */ + + iter = niter + 1; + + for (niter = iter; niter <= 400; ++niter) { + +/* Test for convergence */ + + if (abs(w) <= eps * erretm) { +/* $ .OR. (SGUB-SGLB).LE.EIGHT*ABS(SGUB+SGLB) ) THEN */ + goto L240; + } + + if (w <= 0.) { + sglb = max(sglb,tau); + } else { + sgub = min(sgub,tau); + } + +/* Calculate the new step */ + + if (! swtch3) { + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + } else { + temp = z__[ii] / (work[ii] * delta[ii]); + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - dtisq * dpsi - dtipsq * dphi; + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * + (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * ( + dpsi + dphi); + } + } else { + a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) + / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } else { + +/* Interpolation using THREE most relevant poles */ + + dtiim = work[iim1] * delta[iim1]; + dtiip = work[iip1] * delta[iip1]; + temp = rhoinv + psi + phi; + if (swtch) { + c__ = temp - dtiim * dpsi - dtiip * dphi; + zz[0] = dtiim * dtiim * dpsi; + zz[2] = dtiip * dtiip * dphi; + } else { + if (orgati) { + temp1 = z__[iim1] / dtiim; + temp1 *= temp1; + temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiip * (dpsi + dphi) - temp2; + zz[0] = z__[iim1] * z__[iim1]; + if (dpsi < temp1) { + zz[2] = dtiip * dtiip * dphi; + } else { + zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi); + } + } else { + temp1 = z__[iip1] / dtiip; + temp1 *= temp1; + temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[ + iip1]) * temp1; + c__ = temp - dtiim * (dpsi + dphi) - temp2; + if (dphi < temp1) { + zz[0] = dtiim * dtiim * dpsi; + } else { + zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1)); + } + zz[2] = z__[iip1] * z__[iip1]; + } + } + dd[0] = dtiim; + dd[1] = delta[ii] * work[ii]; + dd[2] = dtiip; + dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info); + + if (*info != 0) { + +/* If INFO is not 0, i.e., DLAED6 failed, switch */ +/* back to two pole interpolation */ + + swtch3 = FALSE_; + *info = 0; + dtipsq = work[ip1] * delta[ip1]; + dtisq = work[*i__] * delta[*i__]; + if (! swtch) { + if (orgati) { +/* Computing 2nd power */ + d__1 = z__[*i__] / dtisq; + c__ = w - dtipsq * dw + delsq * (d__1 * d__1); + } else { +/* Computing 2nd power */ + d__1 = z__[ip1] / dtipsq; + c__ = w - dtisq * dw - delsq * (d__1 * d__1); + } + } else { + temp = z__[ii] / (work[ii] * delta[ii]); + if (orgati) { + dpsi += temp * temp; + } else { + dphi += temp * temp; + } + c__ = w - dtisq * dpsi - dtipsq * dphi; + } + a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw; + b = dtipsq * dtisq * w; + if (c__ == 0.) { + if (a == 0.) { + if (! swtch) { + if (orgati) { + a = z__[*i__] * z__[*i__] + dtipsq * + dtipsq * (dpsi + dphi); + } else { + a = z__[ip1] * z__[ip1] + dtisq * dtisq * + (dpsi + dphi); + } + } else { + a = dtisq * dtisq * dpsi + dtipsq * dtipsq * + dphi; + } + } + eta = b / a; + } else if (a <= 0.) { + eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs( + d__1)))) / (c__ * 2.); + } else { + eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, + abs(d__1)))); + } + } + } + +/* Note, eta should be positive if w is negative, and */ +/* eta should be negative otherwise. However, */ +/* if for some reason caused by roundoff, eta*w > 0, */ +/* we simply use one Newton step instead. This way */ +/* will guarantee eta*w < 0. */ + + if (w * eta >= 0.) { + eta = -w / dw; + } + + eta /= *sigma + sqrt(*sigma * *sigma + eta); + temp = tau + eta; + if (temp > sgub || temp < sglb) { + if (w < 0.) { + eta = (sgub - tau) / 2.; + } else { + eta = (sglb - tau) / 2.; + } + if (geomavg) { + if (w < 0.) { + if (tau > 0.) { + eta = sqrt(sgub * tau) - tau; + } + } else { + if (sglb > 0.) { + eta = sqrt(sglb * tau) - tau; + } + } + } + } + + prew = w; + + tau += eta; + *sigma += eta; + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + work[j] += eta; + delta[j] -= eta; +/* L200: */ + } + +/* Evaluate PSI and the derivative DPSI */ + + dpsi = 0.; + psi = 0.; + erretm = 0.; + i__1 = iim1; + for (j = 1; j <= i__1; ++j) { + temp = z__[j] / (work[j] * delta[j]); + psi += z__[j] * temp; + dpsi += temp * temp; + erretm += psi; +/* L210: */ + } + erretm = abs(erretm); + +/* Evaluate PHI and the derivative DPHI */ + + dphi = 0.; + phi = 0.; + i__1 = iip1; + for (j = *n; j >= i__1; --j) { + temp = z__[j] / (work[j] * delta[j]); + phi += z__[j] * temp; + dphi += temp * temp; + erretm += phi; +/* L220: */ + } + + tau2 = work[ii] * delta[ii]; + temp = z__[ii] / tau2; + dw = dpsi + dphi + temp * temp; + temp = z__[ii] * temp; + w = rhoinv + phi + psi + temp; + erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.; +/* $ + ABS( TAU2 )*DW */ + + if (w * prew > 0. && abs(w) > abs(prew) / 10.) { + swtch = ! swtch; + } + +/* L230: */ + } + +/* Return with INFO = 1, NITER = MAXIT and not converged */ + + *info = 1; + + } + +L240: + return 0; + +/* End of DLASD4 */ + +} /* dlasd4_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasd5.cpp b/lib/linalg/dlasd5.cpp new file mode 100644 index 0000000000..a789475cad --- /dev/null +++ b/lib/linalg/dlasd5.cpp @@ -0,0 +1,264 @@ +/* fortran/dlasd5.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASD5 computes the square root of the i-th eigenvalue of a positive symmetric rank-one modific +ation of a 2-by-2 diagonal matrix. Used by sbdsdc. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASD5 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER I */ +/* DOUBLE PRECISION DSIGMA, RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This subroutine computes the square root of the I-th eigenvalue */ +/* > of a positive symmetric rank-one modification of a 2-by-2 diagonal */ +/* > matrix */ +/* > */ +/* > diag( D ) * diag( D ) + RHO * Z * transpose(Z) . */ +/* > */ +/* > The diagonal entries in the array D are assumed to satisfy */ +/* > */ +/* > 0 <= D(i) < D(j) for i < j . */ +/* > */ +/* > We also assume RHO > 0 and that the Euclidean norm of the vector */ +/* > Z is one. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] I */ +/* > \verbatim */ +/* > I is INTEGER */ +/* > The index of the eigenvalue to be computed. I = 1 or I = 2. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension ( 2 ) */ +/* > The original eigenvalues. We assume 0 <= D(1) < D(2). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( 2 ) */ +/* > The components of the updating vector. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DELTA */ +/* > \verbatim */ +/* > DELTA is DOUBLE PRECISION array, dimension ( 2 ) */ +/* > Contains (D(j) - sigma_I) in its j-th component. */ +/* > The vector DELTA contains the information necessary */ +/* > to construct the eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > The scalar in the symmetric updating formula. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DSIGMA */ +/* > \verbatim */ +/* > DSIGMA is DOUBLE PRECISION */ +/* > The computed sigma_I, the I-th updated eigenvalue. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension ( 2 ) */ +/* > WORK contains (D(j) + sigma_I) in its j-th component. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ren-Cang Li, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__, + doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal * + work) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal b, c__, w, del, tau, delsq; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --delta; + --z__; + --d__; + + /* Function Body */ + del = d__[2] - d__[1]; + delsq = del * (d__[2] + d__[1]); + if (*i__ == 1) { + w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] * + z__[1] / (d__[1] * 3. + d__[2])) / del + 1.; + if (w > 0.) { + b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[1] * z__[1] * delsq; + +/* B > ZERO, always */ + +/* The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) */ + + tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1)))); + +/* The following TAU is DSIGMA - D( 1 ) */ + + tau /= d__[1] + sqrt(d__[1] * d__[1] + tau); + *dsigma = d__[1] + tau; + delta[1] = -tau; + delta[2] = del - tau; + work[1] = d__[1] * 2. + tau; + work[2] = d__[1] + tau + d__[2]; +/* DELTA( 1 ) = -Z( 1 ) / TAU */ +/* DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) */ + } else { + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.) { + tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.)); + } else { + tau = (b - sqrt(b * b + c__ * 4.)) / 2.; + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1))); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2. + tau; +/* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) */ +/* DELTA( 2 ) = -Z( 2 ) / TAU */ + } +/* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) */ +/* DELTA( 1 ) = DELTA( 1 ) / TEMP */ +/* DELTA( 2 ) = DELTA( 2 ) / TEMP */ + } else { + +/* Now I=2 */ + + b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]); + c__ = *rho * z__[2] * z__[2] * delsq; + +/* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */ + + if (b > 0.) { + tau = (b + sqrt(b * b + c__ * 4.)) / 2.; + } else { + tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.)); + } + +/* The following TAU is DSIGMA - D( 2 ) */ + + tau /= d__[2] + sqrt(d__[2] * d__[2] + tau); + *dsigma = d__[2] + tau; + delta[1] = -(del + tau); + delta[2] = -tau; + work[1] = d__[1] + tau + d__[2]; + work[2] = d__[2] * 2. + tau; +/* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) */ +/* DELTA( 2 ) = -Z( 2 ) / TAU */ +/* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) */ +/* DELTA( 1 ) = DELTA( 1 ) / TEMP */ +/* DELTA( 2 ) = DELTA( 2 ) / TEMP */ + } + return 0; + +/* End of DLASD5 */ + +} /* dlasd5_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasd6.cpp b/lib/linalg/dlasd6.cpp new file mode 100644 index 0000000000..d3b6cddd86 --- /dev/null +++ b/lib/linalg/dlasd6.cpp @@ -0,0 +1,514 @@ +/* fortran/dlasd6.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static doublereal c_b7 = 1.; +static integer c__1 = 1; +static integer c_n1 = -1; + +/* > \brief \b DLASD6 computes the SVD of an updated upper bidiagonal matrix obtained by merging two smaller o +nes by appending a row. Used by sbdsdc. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASD6 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA, */ +/* IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, */ +/* LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK, */ +/* IWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, */ +/* $ NR, SQRE */ +/* DOUBLE PRECISION ALPHA, BETA, C, S */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ), */ +/* $ PERM( * ) */ +/* DOUBLE PRECISION D( * ), DIFL( * ), DIFR( * ), */ +/* $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), */ +/* $ VF( * ), VL( * ), WORK( * ), Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASD6 computes the SVD of an updated upper bidiagonal matrix B */ +/* > obtained by merging two smaller ones by appending a row. This */ +/* > routine is used only for the problem which requires all singular */ +/* > values and optionally singular vector matrices in factored form. */ +/* > B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. */ +/* > A related subroutine, DLASD1, handles the case in which all singular */ +/* > values and singular vectors of the bidiagonal matrix are desired. */ +/* > */ +/* > DLASD6 computes the SVD as follows: */ +/* > */ +/* > ( D1(in) 0 0 0 ) */ +/* > B = U(in) * ( Z1**T a Z2**T b ) * VT(in) */ +/* > ( 0 0 D2(in) 0 ) */ +/* > */ +/* > = U(out) * ( D(out) 0) * VT(out) */ +/* > */ +/* > where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M */ +/* > with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros */ +/* > elsewhere; and the entry b is empty if SQRE = 0. */ +/* > */ +/* > The singular values of B can be computed using D1, D2, the first */ +/* > components of all the right singular vectors of the lower block, and */ +/* > the last components of all the right singular vectors of the upper */ +/* > block. These components are stored and updated in VF and VL, */ +/* > respectively, in DLASD6. Hence U and VT are not explicitly */ +/* > referenced. */ +/* > */ +/* > The singular values are stored in D. The algorithm consists of two */ +/* > stages: */ +/* > */ +/* > The first stage consists of deflating the size of the problem */ +/* > when there are multiple singular values or if there is a zero */ +/* > in the Z vector. For each such occurrence the dimension of the */ +/* > secular equation problem is reduced by one. This stage is */ +/* > performed by the routine DLASD7. */ +/* > */ +/* > The second stage consists of calculating the updated */ +/* > singular values. This is done by finding the roots of the */ +/* > secular equation via the routine DLASD4 (as called by DLASD8). */ +/* > This routine also updates VF and VL and computes the distances */ +/* > between the updated singular values and the old singular */ +/* > values. */ +/* > */ +/* > DLASD6 is called from DLASDA. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > Specifies whether singular vectors are to be computed in */ +/* > factored form: */ +/* > = 0: Compute singular values only. */ +/* > = 1: Compute singular vectors in factored form as well. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NL */ +/* > \verbatim */ +/* > NL is INTEGER */ +/* > The row dimension of the upper block. NL >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NR */ +/* > \verbatim */ +/* > NR is INTEGER */ +/* > The row dimension of the lower block. NR >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SQRE */ +/* > \verbatim */ +/* > SQRE is INTEGER */ +/* > = 0: the lower block is an NR-by-NR square matrix. */ +/* > = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ +/* > */ +/* > The bidiagonal matrix has row dimension N = NL + NR + 1, */ +/* > and column dimension M = N + SQRE. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension ( NL+NR+1 ). */ +/* > On entry D(1:NL,1:NL) contains the singular values of the */ +/* > upper block, and D(NL+2:N) contains the singular values */ +/* > of the lower block. On exit D(1:N) contains the singular */ +/* > values of the modified matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VF */ +/* > \verbatim */ +/* > VF is DOUBLE PRECISION array, dimension ( M ) */ +/* > On entry, VF(1:NL+1) contains the first components of all */ +/* > right singular vectors of the upper block; and VF(NL+2:M) */ +/* > contains the first components of all right singular vectors */ +/* > of the lower block. On exit, VF contains the first components */ +/* > of all right singular vectors of the bidiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION array, dimension ( M ) */ +/* > On entry, VL(1:NL+1) contains the last components of all */ +/* > right singular vectors of the upper block; and VL(NL+2:M) */ +/* > contains the last components of all right singular vectors of */ +/* > the lower block. On exit, VL contains the last components of */ +/* > all right singular vectors of the bidiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION */ +/* > Contains the diagonal element associated with the added row. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION */ +/* > Contains the off-diagonal element associated with the added */ +/* > row. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] IDXQ */ +/* > \verbatim */ +/* > IDXQ is INTEGER array, dimension ( N ) */ +/* > This contains the permutation which will reintegrate the */ +/* > subproblem just solved back into sorted order, i.e. */ +/* > D( IDXQ( I = 1, N ) ) will be in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension ( N ) */ +/* > The permutations (from deflation and sorting) to be applied */ +/* > to each block. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER */ +/* > The number of Givens rotations which took place in this */ +/* > subproblem. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) */ +/* > Each pair of numbers indicates a pair of columns to take place */ +/* > in a Givens rotation. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDGCOL */ +/* > \verbatim */ +/* > LDGCOL is INTEGER */ +/* > leading dimension of GIVCOL, must be at least N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* > Each number indicates the C or S value to be used in the */ +/* > corresponding Givens rotation. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDGNUM */ +/* > \verbatim */ +/* > LDGNUM is INTEGER */ +/* > The leading dimension of GIVNUM and POLES, must be at least N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] POLES */ +/* > \verbatim */ +/* > POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* > On exit, POLES(1,*) is an array containing the new singular */ +/* > values obtained from solving the secular equation, and */ +/* > POLES(2,*) is an array containing the poles in the secular */ +/* > equation. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DIFL */ +/* > \verbatim */ +/* > DIFL is DOUBLE PRECISION array, dimension ( N ) */ +/* > On exit, DIFL(I) is the distance between I-th updated */ +/* > (undeflated) singular value and the I-th (undeflated) old */ +/* > singular value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DIFR */ +/* > \verbatim */ +/* > DIFR is DOUBLE PRECISION array, */ +/* > dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and */ +/* > dimension ( K ) if ICOMPQ = 0. */ +/* > On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not */ +/* > defined and will not be referenced. */ +/* > */ +/* > If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */ +/* > normalizing factors for the right singular vector matrix. */ +/* > */ +/* > See DLASD8 for details on DIFL and DIFR. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( M ) */ +/* > The first elements of this array contain the components */ +/* > of the deflation-adjusted updating row vector. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > Contains the dimension of the non-deflated matrix, */ +/* > This is the order of the related secular equation. 1 <= K <=N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION */ +/* > C contains garbage if SQRE =0 and the C-value of a Givens */ +/* > rotation related to the right null space if SQRE = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION */ +/* > S contains garbage if SQRE =0 and the S-value of a Givens */ +/* > rotation related to the right null space if SQRE = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension ( 4 * M ) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension ( 3 * N ) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, a singular value did not converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr, + integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl, + doublereal *alpha, doublereal *beta, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *poles, doublereal *difl, doublereal * + difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s, + doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *), dlasd7_(integer *, integer *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *), dlasd8_( + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen), dlamrg_(integer *, integer *, + doublereal *, integer *, integer *, integer *); + integer isigma; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal orgnrm; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --vf; + --vl; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + poles_dim1 = *ldgnum; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + --difl; + --difr; + --z__; + --work; + --iwork; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -14; + } else if (*ldgnum < n) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD6", &i__1, (ftnlen)6); + return 0; + } + +/* The following values are for bookkeeping purposes only. They are */ +/* integer pointers which indicate the portion of the workspace */ +/* used by a particular array in DLASD7 and DLASD8. */ + + isigma = 1; + iw = isigma + n; + ivfw = iw + m; + ivlw = ivfw + m; + + idx = 1; + idxc = idx + n; + idxp = idxc + n; + +/* Scale. */ + +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1,d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } +/* L10: */ + } + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info, ( + ftnlen)1); + *alpha /= orgnrm; + *beta /= orgnrm; + +/* Sort and Deflate singular values. */ + + dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], & + work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], & + iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[ + givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, + info); + +/* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */ + + dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], + ldgnum, &work[isigma], &work[iw], info); + +/* Report the possible convergence failure. */ + + if (*info != 0) { + return 0; + } + +/* Save the poles if ICOMPQ = 1. */ + + if (*icompq == 1) { + dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1); + dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1); + } + +/* Unscale. */ + + dlascl_((char *)"G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info, ( + ftnlen)1); + +/* Prepare the IDXQ sorting permutation. */ + + n1 = *k; + n2 = n - *k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + + return 0; + +/* End of DLASD6 */ + +} /* dlasd6_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasd7.cpp b/lib/linalg/dlasd7.cpp new file mode 100644 index 0000000000..29169c7a84 --- /dev/null +++ b/lib/linalg/dlasd7.cpp @@ -0,0 +1,663 @@ +/* fortran/dlasd7.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DLASD7 merges the two sets of singular values together into a single sorted set. Then it tries +to deflate the size of the problem. Used by sbdsdc. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASD7 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL, */ +/* VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ, */ +/* PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, */ +/* C, S, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, */ +/* $ NR, SQRE */ +/* DOUBLE PRECISION ALPHA, BETA, C, S */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ), */ +/* $ IDXQ( * ), PERM( * ) */ +/* DOUBLE PRECISION D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ), */ +/* $ VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ), */ +/* $ ZW( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASD7 merges the two sets of singular values together into a single */ +/* > sorted set. Then it tries to deflate the size of the problem. There */ +/* > are two ways in which deflation can occur: when two or more singular */ +/* > values are close together or if there is a tiny entry in the Z */ +/* > vector. For each such occurrence the order of the related */ +/* > secular equation problem is reduced by one. */ +/* > */ +/* > DLASD7 is called from DLASD6. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > Specifies whether singular vectors are to be computed */ +/* > in compact form, as follows: */ +/* > = 0: Compute singular values only. */ +/* > = 1: Compute singular vectors of upper */ +/* > bidiagonal matrix in compact form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NL */ +/* > \verbatim */ +/* > NL is INTEGER */ +/* > The row dimension of the upper block. NL >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NR */ +/* > \verbatim */ +/* > NR is INTEGER */ +/* > The row dimension of the lower block. NR >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SQRE */ +/* > \verbatim */ +/* > SQRE is INTEGER */ +/* > = 0: the lower block is an NR-by-NR square matrix. */ +/* > = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ +/* > */ +/* > The bidiagonal matrix has */ +/* > N = NL + NR + 1 rows and */ +/* > M = N + SQRE >= N columns. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > Contains the dimension of the non-deflated matrix, this is */ +/* > the order of the related secular equation. 1 <= K <=N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension ( N ) */ +/* > On entry D contains the singular values of the two submatrices */ +/* > to be combined. On exit D contains the trailing (N-K) updated */ +/* > singular values (those which were deflated) sorted into */ +/* > increasing order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( M ) */ +/* > On exit Z contains the updating row vector in the secular */ +/* > equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ZW */ +/* > \verbatim */ +/* > ZW is DOUBLE PRECISION array, dimension ( M ) */ +/* > Workspace for Z. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VF */ +/* > \verbatim */ +/* > VF is DOUBLE PRECISION array, dimension ( M ) */ +/* > On entry, VF(1:NL+1) contains the first components of all */ +/* > right singular vectors of the upper block; and VF(NL+2:M) */ +/* > contains the first components of all right singular vectors */ +/* > of the lower block. On exit, VF contains the first components */ +/* > of all right singular vectors of the bidiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VFW */ +/* > \verbatim */ +/* > VFW is DOUBLE PRECISION array, dimension ( M ) */ +/* > Workspace for VF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION array, dimension ( M ) */ +/* > On entry, VL(1:NL+1) contains the last components of all */ +/* > right singular vectors of the upper block; and VL(NL+2:M) */ +/* > contains the last components of all right singular vectors */ +/* > of the lower block. On exit, VL contains the last components */ +/* > of all right singular vectors of the bidiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VLW */ +/* > \verbatim */ +/* > VLW is DOUBLE PRECISION array, dimension ( M ) */ +/* > Workspace for VL. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION */ +/* > Contains the diagonal element associated with the added row. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION */ +/* > Contains the off-diagonal element associated with the added */ +/* > row. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DSIGMA */ +/* > \verbatim */ +/* > DSIGMA is DOUBLE PRECISION array, dimension ( N ) */ +/* > Contains a copy of the diagonal elements (K-1 singular values */ +/* > and one zero) in the secular equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IDX */ +/* > \verbatim */ +/* > IDX is INTEGER array, dimension ( N ) */ +/* > This will contain the permutation used to sort the contents of */ +/* > D into ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IDXP */ +/* > \verbatim */ +/* > IDXP is INTEGER array, dimension ( N ) */ +/* > This will contain the permutation used to place deflated */ +/* > values of D at the end of the array. On output IDXP(2:K) */ +/* > points to the nondeflated D-values and IDXP(K+1:N) */ +/* > points to the deflated singular values. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IDXQ */ +/* > \verbatim */ +/* > IDXQ is INTEGER array, dimension ( N ) */ +/* > This contains the permutation which separately sorts the two */ +/* > sub-problems in D into ascending order. Note that entries in */ +/* > the first half of this permutation must first be moved one */ +/* > position backward; and entries in the second half */ +/* > must first have NL+1 added to their values. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension ( N ) */ +/* > The permutations (from deflation and sorting) to be applied */ +/* > to each singular block. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER */ +/* > The number of Givens rotations which took place in this */ +/* > subproblem. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) */ +/* > Each pair of numbers indicates a pair of columns to take place */ +/* > in a Givens rotation. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDGCOL */ +/* > \verbatim */ +/* > LDGCOL is INTEGER */ +/* > The leading dimension of GIVCOL, must be at least N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */ +/* > Each number indicates the C or S value to be used in the */ +/* > corresponding Givens rotation. Not referenced if ICOMPQ = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDGNUM */ +/* > \verbatim */ +/* > LDGNUM is INTEGER */ +/* > The leading dimension of GIVNUM, must be at least N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION */ +/* > C contains garbage if SQRE =0 and the C-value of a Givens */ +/* > rotation related to the right null space if SQRE = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION */ +/* > S contains garbage if SQRE =0 and the S-value of a Givens */ +/* > rotation related to the right null space if SQRE = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr, + integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl, + doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal * + dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm, + integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum, + integer *ldgnum, doublereal *c__, doublereal *s, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer i__, j, m, n, k2; + doublereal z1; + integer jp; + doublereal eps, tau, tol; + integer nlp1, nlp2, idxi, idxj; + extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *); + integer idxjp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer jprev; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, + ftnlen); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *, + ftnlen); + doublereal hlftol; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ + +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --z__; + --zw; + --vf; + --vfw; + --vl; + --vlw; + --dsigma; + --idx; + --idxp; + --idxq; + --perm; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + givnum_dim1 = *ldgnum; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + + /* Function Body */ + *info = 0; + n = *nl + *nr + 1; + m = n + *sqre; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*nl < 1) { + *info = -2; + } else if (*nr < 1) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldgcol < n) { + *info = -22; + } else if (*ldgnum < n) { + *info = -24; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD7", &i__1, (ftnlen)6); + return 0; + } + + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*icompq == 1) { + *givptr = 0; + } + +/* Generate the first part of the vector Z and move the singular */ +/* values in the first part of D one position backward. */ + + z1 = *alpha * vl[nlp1]; + vl[nlp1] = 0.; + tau = vf[nlp1]; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vl[i__]; + vl[i__] = 0.; + vf[i__ + 1] = vf[i__]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; +/* L10: */ + } + vf[1] = tau; + +/* Generate the second part of the vector Z. */ + + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vf[i__]; + vf[i__] = 0.; +/* L20: */ + } + +/* Sort the singular values into increasing order */ + + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; +/* L30: */ + } + +/* DSIGMA, IDXC, IDXC, and ZW are used as storage space. */ + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + zw[i__] = z__[idxq[i__]]; + vfw[i__] = vf[idxq[i__]]; + vlw[i__] = vl[idxq[i__]]; +/* L40: */ + } + + dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = zw[idxi]; + vf[i__] = vfw[idxi]; + vl[i__] = vlw[idxi]; +/* L50: */ + } + +/* Calculate the allowable deflation tolerance */ + + eps = dlamch_((char *)"Epsilon", (ftnlen)7); +/* Computing MAX */ + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1,d__2); +/* Computing MAX */ + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 64. * max(d__2,tol); + +/* There are 2 kinds of deflation -- first a value in the z-vector */ +/* is small, second two (or more) singular values are very close */ +/* together (their difference is small). */ + +/* If the value in the z-vector is small, we simply permute the */ +/* array so that the corresponding singular value is moved to the */ +/* end. */ + +/* If two values in the D-vector are close, we perform a two-sided */ +/* rotation designed to make one of the corresponding z-vector */ +/* entries zero, and then permute the array so that the deflated */ +/* singular value is moved to the end. */ + +/* If there are multiple singular values then the problem deflates. */ +/* Here the number of equal singular values are found. As each equal */ +/* singular value is found, an elementary reflector is computed to */ +/* rotate the corresponding singular subspace so that the */ +/* corresponding components of Z are zero in this new basis. */ + + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + if (j == n) { + goto L100; + } + } else { + jprev = j; + goto L70; + } +/* L60: */ + } +L70: + j = jprev; +L80: + ++j; + if (j > n) { + goto L90; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + idxp[k2] = j; + } else { + +/* Check if singular values are close enough to allow deflation. */ + + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + *s = z__[jprev]; + *c__ = z__[j]; + +/* Find sqrt(a**2+b**2) without overflow or */ +/* destructive underflow. */ + + tau = dlapy2_(c__, s); + z__[j] = tau; + z__[jprev] = 0.; + *c__ /= tau; + *s = -(*s) / tau; + +/* Record the appropriate Givens rotation */ + + if (*icompq == 1) { + ++(*givptr); + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + givcol[*givptr + (givcol_dim1 << 1)] = idxjp; + givcol[*givptr + givcol_dim1] = idxj; + givnum[*givptr + (givnum_dim1 << 1)] = *c__; + givnum[*givptr + givnum_dim1] = *s; + } + drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s); + drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s); + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L80; +L90: + +/* Record the last singular value. */ + + ++(*k); + zw[*k] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + +L100: + +/* Sort the singular values into DSIGMA. The singular values which */ +/* were not deflated go into the first K slots of DSIGMA, except */ +/* that DSIGMA(1) is treated separately. */ + + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + vfw[j] = vf[jp]; + vlw[j] = vl[jp]; +/* L110: */ + } + if (*icompq == 1) { + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + perm[j] = idxq[idx[jp] + 1]; + if (perm[j] <= nlp1) { + --perm[j]; + } +/* L120: */ + } + } + +/* The deflated singular values go back into the last N - K slots of */ +/* D. */ + + i__1 = n - *k; + dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + +/* Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and */ +/* VL(M). */ + + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + *c__ = 1.; + *s = 0.; + z__[1] = tol; + } else { + *c__ = z1 / z__[1]; + *s = -z__[m] / z__[1]; + } + drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s); + drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s); + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + +/* Restore Z, VF, and VL. */ + + i__1 = *k - 1; + dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1); + i__1 = n - 1; + dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1); + i__1 = n - 1; + dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1); + + return 0; + +/* End of DLASD7 */ + +} /* dlasd7_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasd8.cpp b/lib/linalg/dlasd8.cpp new file mode 100644 index 0000000000..b95d5b11cc --- /dev/null +++ b/lib/linalg/dlasd8.cpp @@ -0,0 +1,419 @@ +/* fortran/dlasd8.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b8 = 1.; + +/* > \brief \b DLASD8 finds the square roots of the roots of the secular equation, and stores, for each elemen +t in D, the distance to its two nearest poles. Used by sbdsdc. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASD8 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, */ +/* DSIGMA, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER ICOMPQ, INFO, K, LDDIFR */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), DIFL( * ), DIFR( LDDIFR, * ), */ +/* $ DSIGMA( * ), VF( * ), VL( * ), WORK( * ), */ +/* $ Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASD8 finds the square roots of the roots of the secular equation, */ +/* > as defined by the values in DSIGMA and Z. It makes the appropriate */ +/* > calls to DLASD4, and stores, for each element in D, the distance */ +/* > to its two nearest poles (elements in DSIGMA). It also updates */ +/* > the arrays VF and VL, the first and last components of all the */ +/* > right singular vectors of the original bidiagonal matrix. */ +/* > */ +/* > DLASD8 is called from DLASD6. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > Specifies whether singular vectors are to be computed in */ +/* > factored form in the calling routine: */ +/* > = 0: Compute singular values only. */ +/* > = 1: Compute singular vectors in factored form as well. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of terms in the rational function to be solved */ +/* > by DLASD4. K >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension ( K ) */ +/* > On output, D contains the updated singular values. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( K ) */ +/* > On entry, the first K elements of this array contain the */ +/* > components of the deflation-adjusted updating row vector. */ +/* > On exit, Z is updated. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VF */ +/* > \verbatim */ +/* > VF is DOUBLE PRECISION array, dimension ( K ) */ +/* > On entry, VF contains information passed through DBEDE8. */ +/* > On exit, VF contains the first K components of the first */ +/* > components of all right singular vectors of the bidiagonal */ +/* > matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VL */ +/* > \verbatim */ +/* > VL is DOUBLE PRECISION array, dimension ( K ) */ +/* > On entry, VL contains information passed through DBEDE8. */ +/* > On exit, VL contains the first K components of the last */ +/* > components of all right singular vectors of the bidiagonal */ +/* > matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DIFL */ +/* > \verbatim */ +/* > DIFL is DOUBLE PRECISION array, dimension ( K ) */ +/* > On exit, DIFL(I) = D(I) - DSIGMA(I). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DIFR */ +/* > \verbatim */ +/* > DIFR is DOUBLE PRECISION array, */ +/* > dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and */ +/* > dimension ( K ) if ICOMPQ = 0. */ +/* > On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not */ +/* > defined and will not be referenced. */ +/* > */ +/* > If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */ +/* > normalizing factors for the right singular vector matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDDIFR */ +/* > \verbatim */ +/* > LDDIFR is INTEGER */ +/* > The leading dimension of DIFR, must be at least K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DSIGMA */ +/* > \verbatim */ +/* > DSIGMA is DOUBLE PRECISION array, dimension ( K ) */ +/* > On entry, the first K elements of this array contain the old */ +/* > roots of the deflated updating problem. These are the poles */ +/* > of the secular equation. */ +/* > On exit, the elements of DSIGMA may be very slightly altered */ +/* > in value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (3*K) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, a singular value did not converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__, + doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, + doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * + work, integer *info) +{ + /* System generated locals */ + integer difr_dim1, difr_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer i__, j; + doublereal dj, rho; + integer iwk1, iwk2, iwk3; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + integer iwk2i, iwk3i; + doublereal diflj, difrj, dsigj; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + extern doublereal dlamc3_(doublereal *, doublereal *); + extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *), dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + doublereal dsigjp; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --z__; + --vf; + --vl; + --difl; + difr_dim1 = *lddifr; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + --dsigma; + --work; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*k < 1) { + *info = -2; + } else if (*lddifr < *k) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD8", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*k == 1) { + d__[1] = abs(z__[1]); + difl[1] = d__[1]; + if (*icompq == 1) { + difl[2] = 1.; + difr[(difr_dim1 << 1) + 1] = 1.; + } + return 0; + } + +/* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ +/* be computed with high relative accuracy (barring over/underflow). */ +/* This is a problem on machines without a guard digit in */ +/* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ +/* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ +/* which on any of these machines zeros out the bottommost */ +/* bit of DSIGMA(I) if it is 1; this makes the subsequent */ +/* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ +/* occurs. On binary machines with a guard digit (almost all */ +/* machines) it does not change DSIGMA(I) at all. On hexadecimal */ +/* and decimal machines with a guard digit, it slightly */ +/* changes the bottommost bits of DSIGMA(I). It does not account */ +/* for hexadecimal or decimal machines without guard digits */ +/* (we know of none). We use a subroutine call to compute */ +/* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ +/* this code. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; +/* L10: */ + } + +/* Book keeping. */ + + iwk1 = 1; + iwk2 = iwk1 + *k; + iwk3 = iwk2 + *k; + iwk2i = iwk2 - 1; + iwk3i = iwk3 - 1; + +/* Normalize Z. */ + + rho = dnrm2_(k, &z__[1], &c__1); + dlascl_((char *)"G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info, ( + ftnlen)1); + rho *= rho; + +/* Initialize WORK(IWK3). */ + + dlaset_((char *)"A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k, (ftnlen)1); + +/* Compute the updated singular values, the arrays DIFL, DIFR, */ +/* and the updated Z. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ + iwk2], info); + +/* If the root finder fails, report the convergence failure. */ + + if (*info != 0) { + return 0; + } + work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; + difl[j] = -work[j]; + difr[j + difr_dim1] = -work[j + 1]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L20: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ + j]); +/* L30: */ + } +/* L40: */ + } + +/* Compute updated Z. */ + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1))); + z__[i__] = d_sign(&d__2, &z__[i__]); +/* L50: */ + } + +/* Update VF and VL. */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + diflj = difl[j]; + dj = d__[j]; + dsigj = -dsigma[j]; + if (j < *k) { + difrj = -difr[j + difr_dim1]; + dsigjp = -dsigma[j + 1]; + } + work[j] = -z__[j] / diflj / (dsigma[j] + dj); + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / ( + dsigma[i__] + dj); +/* L60: */ + } + i__2 = *k; + for (i__ = j + 1; i__ <= i__2; ++i__) { + work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) / + (dsigma[i__] + dj); +/* L70: */ + } + temp = dnrm2_(k, &work[1], &c__1); + work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; + work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; + if (*icompq == 1) { + difr[j + (difr_dim1 << 1)] = temp; + } +/* L80: */ + } + + dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); + dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); + + return 0; + +/* End of DLASD8 */ + +} /* dlasd8_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasda.cpp b/lib/linalg/dlasda.cpp new file mode 100644 index 0000000000..a63dc14dec --- /dev/null +++ b/lib/linalg/dlasda.cpp @@ -0,0 +1,624 @@ +/* fortran/dlasda.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static doublereal c_b11 = 0.; +static doublereal c_b12 = 1.; +static integer c__1 = 1; +static integer c__2 = 2; + +/* > \brief \b DLASDA computes the singular value decomposition (SVD) of a real upper bidiagonal matrix with d +iagonal d and off-diagonal e. Used by sbdsdc. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASDA + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K, */ +/* DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, */ +/* PERM, GIVNUM, C, S, WORK, IWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), */ +/* $ K( * ), PERM( LDGCOL, * ) */ +/* DOUBLE PRECISION C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ), */ +/* $ E( * ), GIVNUM( LDU, * ), POLES( LDU, * ), */ +/* $ S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ), */ +/* $ Z( LDU, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Using a divide and conquer approach, DLASDA computes the singular */ +/* > value decomposition (SVD) of a real upper bidiagonal N-by-M matrix */ +/* > B with diagonal D and offdiagonal E, where M = N + SQRE. The */ +/* > algorithm computes the singular values in the SVD B = U * S * VT. */ +/* > The orthogonal matrices U and VT are optionally computed in */ +/* > compact form. */ +/* > */ +/* > A related subroutine, DLASD0, computes the singular values and */ +/* > the singular vectors in explicit form. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ICOMPQ */ +/* > \verbatim */ +/* > ICOMPQ is INTEGER */ +/* > Specifies whether singular vectors are to be computed */ +/* > in compact form, as follows */ +/* > = 0: Compute singular values only. */ +/* > = 1: Compute singular vectors of upper bidiagonal */ +/* > matrix in compact form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SMLSIZ */ +/* > \verbatim */ +/* > SMLSIZ is INTEGER */ +/* > The maximum size of the subproblems at the bottom of the */ +/* > computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The row dimension of the upper bidiagonal matrix. This is */ +/* > also the dimension of the main diagonal array D. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SQRE */ +/* > \verbatim */ +/* > SQRE is INTEGER */ +/* > Specifies the column dimension of the bidiagonal matrix. */ +/* > = 0: The bidiagonal matrix has column dimension M = N; */ +/* > = 1: The bidiagonal matrix has column dimension M = N + 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension ( N ) */ +/* > On entry D contains the main diagonal of the bidiagonal */ +/* > matrix. On exit D, if INFO = 0, contains its singular values. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension ( M-1 ) */ +/* > Contains the subdiagonal entries of the bidiagonal matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, */ +/* > dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced */ +/* > if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left */ +/* > singular vector matrices of all subproblems at the bottom */ +/* > level. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER, LDU = > N. */ +/* > The leading dimension of arrays U, VT, DIFL, DIFR, POLES, */ +/* > GIVNUM, and Z. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, */ +/* > dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced */ +/* > if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT**T contains the right */ +/* > singular vector matrices of all subproblems at the bottom */ +/* > level. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER array, */ +/* > dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. */ +/* > If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th */ +/* > secular equation on the computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DIFL */ +/* > \verbatim */ +/* > DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ), */ +/* > where NLVL = floor(log_2 (N/SMLSIZ))). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DIFR */ +/* > \verbatim */ +/* > DIFR is DOUBLE PRECISION array, */ +/* > dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and */ +/* > dimension ( N ) if ICOMPQ = 0. */ +/* > If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) */ +/* > record distances between singular values on the I-th */ +/* > level and singular values on the (I -1)-th level, and */ +/* > DIFR(1:N, 2 * I ) contains the normalizing factors for */ +/* > the right singular vector matrix. See DLASD8 for details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, */ +/* > dimension ( LDU, NLVL ) if ICOMPQ = 1 and */ +/* > dimension ( N ) if ICOMPQ = 0. */ +/* > The first K elements of Z(1, I) contain the components of */ +/* > the deflation-adjusted updating row vector for subproblems */ +/* > on the I-th level. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] POLES */ +/* > \verbatim */ +/* > POLES is DOUBLE PRECISION array, */ +/* > dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced */ +/* > if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and */ +/* > POLES(1, 2*I) contain the new and old singular values */ +/* > involved in the secular equations on the I-th level. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER array, */ +/* > dimension ( N ) if ICOMPQ = 1, and not referenced if */ +/* > ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records */ +/* > the number of Givens rotations performed on the I-th */ +/* > problem on the computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, */ +/* > dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not */ +/* > referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, */ +/* > GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations */ +/* > of Givens rotations performed on the I-th level on the */ +/* > computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDGCOL */ +/* > \verbatim */ +/* > LDGCOL is INTEGER, LDGCOL = > N. */ +/* > The leading dimension of arrays GIVCOL and PERM. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, */ +/* > dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced */ +/* > if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records */ +/* > permutations done on the I-th level of the computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, */ +/* > dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not */ +/* > referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, */ +/* > GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- */ +/* > values of Givens rotations performed on the I-th level on */ +/* > the computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, */ +/* > dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. */ +/* > If ICOMPQ = 1 and the I-th subproblem is not square, on exit, */ +/* > C( I ) contains the C-value of a Givens rotation related to */ +/* > the right null space of the I-th subproblem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension ( N ) if */ +/* > ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 */ +/* > and the I-th subproblem is not square, on exit, S( I ) */ +/* > contains the S-value of a Givens rotation related to */ +/* > the right null space of the I-th subproblem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension */ +/* > (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (7*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, a singular value did not converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n, + integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer + *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, + doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, + integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, + doublereal *s, doublereal *work, integer *iwork, integer *info) +{ + /* System generated locals */ + integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, + difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, + poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, + z_dim1, z_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, nlf, nrf, + vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; + doublereal beta; + integer idxq, nlvl; + doublereal alpha; + integer inode, ndiml, ndimr, idxqi, itemp; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer sqrei; + extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, integer *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, integer *, integer *); + integer nwork1, nwork2; + extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer + *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, ftnlen), dlasdt_(integer *, integer *, + integer *, integer *, integer *, integer *, integer *), dlaset_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, ftnlen), xerbla_(char *, integer *, + ftnlen); + integer smlszp; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + givnum_dim1 = *ldu; + givnum_offset = 1 + givnum_dim1; + givnum -= givnum_offset; + poles_dim1 = *ldu; + poles_offset = 1 + poles_dim1; + poles -= poles_offset; + z_dim1 = *ldu; + z_offset = 1 + z_dim1; + z__ -= z_offset; + difr_dim1 = *ldu; + difr_offset = 1 + difr_dim1; + difr -= difr_offset; + difl_dim1 = *ldu; + difl_offset = 1 + difl_dim1; + difl -= difl_offset; + vt_dim1 = *ldu; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + --k; + --givptr; + perm_dim1 = *ldgcol; + perm_offset = 1 + perm_dim1; + perm -= perm_offset; + givcol_dim1 = *ldgcol; + givcol_offset = 1 + givcol_dim1; + givcol -= givcol_offset; + --c__; + --s; + --work; + --iwork; + + /* Function Body */ + *info = 0; + + if (*icompq < 0 || *icompq > 1) { + *info = -1; + } else if (*smlsiz < 3) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*sqre < 0 || *sqre > 1) { + *info = -4; + } else if (*ldu < *n + *sqre) { + *info = -8; + } else if (*ldgcol < *n) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASDA", &i__1, (ftnlen)6); + return 0; + } + + m = *n + *sqre; + +/* If the input matrix is too small, call DLASDQ to find the SVD. */ + + if (*n <= *smlsiz) { + if (*icompq == 0) { + dlasdq_((char *)"U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ + vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & + work[1], info, (ftnlen)1); + } else { + dlasdq_((char *)"U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] + , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], + info, (ftnlen)1); + } + return 0; + } + +/* Book-keeping and set up the computation tree. */ + + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + + ncc = 0; + nru = 0; + + smlszp = *smlsiz + 1; + vf = 1; + vl = vf + m; + nwork1 = vl + m; + nwork2 = nwork1 + smlszp * smlszp; + + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], + smlsiz); + +/* for the nodes on bottom level of the tree, solve */ +/* their subproblems by DLASDQ. */ + + ndb1 = (nd + 1) / 2; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + +/* IC : center row of each node */ +/* NL : number of rows of left subproblem */ +/* NR : number of rows of right subproblem */ +/* NLF: starting row of the left subproblem */ +/* NRF: starting row of the right subproblem */ + + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nlf = ic - nl; + nrf = ic + 1; + idxqi = idxq + nlf - 2; + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + sqrei = 1; + if (*icompq == 0) { + dlaset_((char *)"A", &nlp1, &nlp1, &c_b11, &c_b12, &work[nwork1], &smlszp, + (ftnlen)1); + dlasdq_((char *)"U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & + work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], + &nl, &work[nwork2], info, (ftnlen)1); + itemp = nwork1 + nl * smlszp; + dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); + dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + dlaset_((char *)"A", &nl, &nl, &c_b11, &c_b12, &u[nlf + u_dim1], ldu, ( + ftnlen)1); + dlaset_((char *)"A", &nlp1, &nlp1, &c_b11, &c_b12, &vt[nlf + vt_dim1], + ldu, (ftnlen)1); + dlasdq_((char *)"U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & + vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf + + u_dim1], ldu, &work[nwork1], info, (ftnlen)1); + dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1); + dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L10: */ + } + if (i__ == nd && *sqre == 0) { + sqrei = 0; + } else { + sqrei = 1; + } + idxqi += nlp1; + vfi += nlp1; + vli += nlp1; + nrp1 = nr + sqrei; + if (*icompq == 0) { + dlaset_((char *)"A", &nrp1, &nrp1, &c_b11, &c_b12, &work[nwork1], &smlszp, + (ftnlen)1); + dlasdq_((char *)"U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & + work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], + &nr, &work[nwork2], info, (ftnlen)1); + itemp = nwork1 + (nrp1 - 1) * smlszp; + dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); + dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); + } else { + dlaset_((char *)"A", &nr, &nr, &c_b11, &c_b12, &u[nrf + u_dim1], ldu, ( + ftnlen)1); + dlaset_((char *)"A", &nrp1, &nrp1, &c_b11, &c_b12, &vt[nrf + vt_dim1], + ldu, (ftnlen)1); + dlasdq_((char *)"U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & + vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf + + u_dim1], ldu, &work[nwork1], info, (ftnlen)1); + dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1); + dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1) + ; + } + if (*info != 0) { + return 0; + } + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[idxqi + j] = j; +/* L20: */ + } +/* L30: */ + } + +/* Now conquer each subproblem bottom-up. */ + + j = pow_ii(&c__2, &nlvl); + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +/* Find the first node LF and last node LL on */ +/* the current level LVL. */ + + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + nrf = ic + 1; + if (i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + vfi = vf + nlf - 1; + vli = vl + nlf - 1; + idxqi = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + if (*icompq == 0) { + dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[ + perm_offset], &givptr[1], &givcol[givcol_offset], + ldgcol, &givnum[givnum_offset], ldu, &poles[ + poles_offset], &difl[difl_offset], &difr[difr_offset], + &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], + &iwork[iwk], info); + } else { + --j; + dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & + work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf + + lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 * + givcol_dim1], ldgcol, &givnum[nlf + lvl2 * + givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], & + difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 * + difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j], + &s[j], &work[nwork1], &iwork[iwk], info); + } + if (*info != 0) { + return 0; + } +/* L40: */ + } +/* L50: */ + } + + return 0; + +/* End of DLASDA */ + +} /* dlasda_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasdq.cpp b/lib/linalg/dlasdq.cpp new file mode 100644 index 0000000000..efb468cef4 --- /dev/null +++ b/lib/linalg/dlasdq.cpp @@ -0,0 +1,486 @@ +/* fortran/dlasdq.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DLASDQ computes the SVD of a real bidiagonal matrix with diagonal d and off-diagonal e. Used by + sbdsdc. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASDQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT, */ +/* U, LDU, C, LDC, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), */ +/* $ VT( LDVT, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASDQ computes the singular value decomposition (SVD) of a real */ +/* > (upper or lower) bidiagonal matrix with diagonal D and offdiagonal */ +/* > E, accumulating the transformations if desired. Letting B denote */ +/* > the input bidiagonal matrix, the algorithm computes orthogonal */ +/* > matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose */ +/* > of P). The singular values S are overwritten on D. */ +/* > */ +/* > The input matrix U is changed to U * Q if desired. */ +/* > The input matrix VT is changed to P**T * VT if desired. */ +/* > The input matrix C is changed to Q**T * C if desired. */ +/* > */ +/* > See "Computing Small Singular Values of Bidiagonal Matrices With */ +/* > Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */ +/* > LAPACK Working Note #3, for a detailed description of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the input bidiagonal matrix */ +/* > is upper or lower bidiagonal, and whether it is square are */ +/* > not. */ +/* > UPLO = 'U' or 'u' B is upper bidiagonal. */ +/* > UPLO = 'L' or 'l' B is lower bidiagonal. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SQRE */ +/* > \verbatim */ +/* > SQRE is INTEGER */ +/* > = 0: then the input matrix is N-by-N. */ +/* > = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and */ +/* > (N+1)-by-N if UPLU = 'L'. */ +/* > */ +/* > The bidiagonal matrix has */ +/* > N = NL + NR + 1 rows and */ +/* > M = N + SQRE >= N columns. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of rows and columns */ +/* > in the matrix. N must be at least 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCVT */ +/* > \verbatim */ +/* > NCVT is INTEGER */ +/* > On entry, NCVT specifies the number of columns of */ +/* > the matrix VT. NCVT must be at least 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRU */ +/* > \verbatim */ +/* > NRU is INTEGER */ +/* > On entry, NRU specifies the number of rows of */ +/* > the matrix U. NRU must be at least 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NCC */ +/* > \verbatim */ +/* > NCC is INTEGER */ +/* > On entry, NCC specifies the number of columns of */ +/* > the matrix C. NCC must be at least 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, D contains the diagonal entries of the */ +/* > bidiagonal matrix whose SVD is desired. On normal exit, */ +/* > D contains the singular values in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array. */ +/* > dimension is (N-1) if SQRE = 0 and N if SQRE = 1. */ +/* > On entry, the entries of E contain the offdiagonal entries */ +/* > of the bidiagonal matrix whose SVD is desired. On normal */ +/* > exit, E will contain 0. If the algorithm does not converge, */ +/* > D and E will contain the diagonal and superdiagonal entries */ +/* > of a bidiagonal matrix orthogonally equivalent to the one */ +/* > given as input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] VT */ +/* > \verbatim */ +/* > VT is DOUBLE PRECISION array, dimension (LDVT, NCVT) */ +/* > On entry, contains a matrix which on exit has been */ +/* > premultiplied by P**T, dimension N-by-NCVT if SQRE = 0 */ +/* > and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDVT */ +/* > \verbatim */ +/* > LDVT is INTEGER */ +/* > On entry, LDVT specifies the leading dimension of VT as */ +/* > declared in the calling (sub) program. LDVT must be at */ +/* > least 1. If NCVT is nonzero LDVT must also be at least N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] U */ +/* > \verbatim */ +/* > U is DOUBLE PRECISION array, dimension (LDU, N) */ +/* > On entry, contains a matrix which on exit has been */ +/* > postmultiplied by Q, dimension NRU-by-N if SQRE = 0 */ +/* > and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDU */ +/* > \verbatim */ +/* > LDU is INTEGER */ +/* > On entry, LDU specifies the leading dimension of U as */ +/* > declared in the calling (sub) program. LDU must be at */ +/* > least max( 1, NRU ) . */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC, NCC) */ +/* > On entry, contains an N-by-NCC matrix which on exit */ +/* > has been premultiplied by Q**T dimension N-by-NCC if SQRE = 0 */ +/* > and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > On entry, LDC specifies the leading dimension of C as */ +/* > declared in the calling (sub) program. LDC must be at */ +/* > least 1. If NCC is nonzero, LDC must also be at least N. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > Workspace. Only referenced if one of NCVT, NRU, or NCC is */ +/* > nonzero, and if N is at least 2. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > On exit, a value of 0 indicates a successful exit. */ +/* > If INFO < 0, argument number -INFO is illegal. */ +/* > If INFO > 0, the algorithm did not converge, and INFO */ +/* > specifies how many superdiagonals did not converge. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer * + ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, + doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, + doublereal *c__, integer *ldc, doublereal *work, integer *info, + ftnlen uplo_len) +{ + /* System generated locals */ + integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, + i__2; + + /* Local variables */ + integer i__, j; + doublereal r__, cs, sn; + integer np1, isub; + doublereal smin; + integer sqre1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen, ftnlen), dswap_(integer *, doublereal *, integer * + , doublereal *, integer *); + integer iuplo; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *), xerbla_(char *, + integer *, ftnlen), dbdsqr_(char *, integer *, integer *, integer + *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen); + logical rotate; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + iuplo = 0; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + iuplo = 1; + } + if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + iuplo = 2; + } + if (iuplo == 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ncvt < 0) { + *info = -4; + } else if (*nru < 0) { + *info = -5; + } else if (*ncc < 0) { + *info = -6; + } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { + *info = -10; + } else if (*ldu < max(1,*nru)) { + *info = -12; + } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { + *info = -14; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASDQ", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + +/* ROTATE is true if any singular vectors desired, false otherwise */ + + rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; + np1 = *n + 1; + sqre1 = *sqre; + +/* If matrix non-square upper bidiagonal, rotate to be lower */ +/* bidiagonal. The rotations are on the right. */ + + if (iuplo == 1 && sqre1 == 1) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L10: */ + } + dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + e[*n] = 0.; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + iuplo = 2; + sqre1 = 0; + +/* Update singular vectors if desired. */ + + if (*ncvt > 0) { + dlasr_((char *)"L", (char *)"V", (char *)"F", &np1, ncvt, &work[1], &work[np1], &vt[ + vt_offset], ldvt, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + +/* If matrix lower bidiagonal, rotate to be upper bidiagonal */ +/* by applying Givens rotations on the left. */ + + if (iuplo == 2) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (rotate) { + work[i__] = cs; + work[*n + i__] = sn; + } +/* L20: */ + } + +/* If matrix (N+1)-by-N lower bidiagonal, one additional */ +/* rotation is needed. */ + + if (sqre1 == 1) { + dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); + d__[*n] = r__; + if (rotate) { + work[*n] = cs; + work[*n + *n] = sn; + } + } + +/* Update singular vectors if desired. */ + + if (*nru > 0) { + if (sqre1 == 0) { + dlasr_((char *)"R", (char *)"V", (char *)"F", nru, n, &work[1], &work[np1], &u[ + u_offset], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } else { + dlasr_((char *)"R", (char *)"V", (char *)"F", nru, &np1, &work[1], &work[np1], &u[ + u_offset], ldu, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + if (*ncc > 0) { + if (sqre1 == 0) { + dlasr_((char *)"L", (char *)"V", (char *)"F", n, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } else { + dlasr_((char *)"L", (char *)"V", (char *)"F", &np1, ncc, &work[1], &work[np1], &c__[ + c_offset], ldc, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } + +/* Call DBDSQR to compute the SVD of the reduced real */ +/* N-by-N upper bidiagonal matrix. */ + + dbdsqr_((char *)"U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ + u_offset], ldu, &c__[c_offset], ldc, &work[1], info, (ftnlen)1); + +/* Sort the singular values into ascending order (insertion sort on */ +/* singular values, but only one transposition per singular vector) */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Scan for smallest D(I). */ + + isub = i__; + smin = d__[i__]; + i__2 = *n; + for (j = i__ + 1; j <= i__2; ++j) { + if (d__[j] < smin) { + isub = j; + smin = d__[j]; + } +/* L30: */ + } + if (isub != i__) { + +/* Swap singular values and vectors. */ + + d__[isub] = d__[i__]; + d__[i__] = smin; + if (*ncvt > 0) { + dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1], + ldvt); + } + if (*nru > 0) { + dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1] + , &c__1); + } + if (*ncc > 0) { + dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc) + ; + } + } +/* L40: */ + } + + return 0; + +/* End of DLASDQ */ + +} /* dlasdq_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasdt.cpp b/lib/linalg/dlasdt.cpp new file mode 100644 index 0000000000..0e507af8f2 --- /dev/null +++ b/lib/linalg/dlasdt.cpp @@ -0,0 +1,209 @@ +/* fortran/dlasdt.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASDT creates a tree of subproblems for bidiagonal divide and conquer. Used by sbdsdc. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASDT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER LVL, MSUB, N, ND */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER INODE( * ), NDIML( * ), NDIMR( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASDT creates a tree of subproblems for bidiagonal divide and */ +/* > conquer. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, the number of diagonal elements of the */ +/* > bidiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] LVL */ +/* > \verbatim */ +/* > LVL is INTEGER */ +/* > On exit, the number of levels on the computation tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ND */ +/* > \verbatim */ +/* > ND is INTEGER */ +/* > On exit, the number of nodes on the tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INODE */ +/* > \verbatim */ +/* > INODE is INTEGER array, dimension ( N ) */ +/* > On exit, centers of subproblems. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NDIML */ +/* > \verbatim */ +/* > NDIML is INTEGER array, dimension ( N ) */ +/* > On exit, row dimensions of left children. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] NDIMR */ +/* > \verbatim */ +/* > NDIMR is INTEGER array, dimension ( N ) */ +/* > On exit, row dimensions of right children. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] MSUB */ +/* > \verbatim */ +/* > MSUB is INTEGER */ +/* > On entry, the maximum row dimension each subproblem at the */ +/* > bottom of the tree can be of. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Ming Gu and Huan Ren, Computer Science Division, University of */ +/* > California at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer * + inode, integer *ndiml, integer *ndimr, integer *msub) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Builtin functions */ + double log(doublereal); + + /* Local variables */ + integer i__, il, ir, maxn; + doublereal temp; + integer nlvl, llst, ncrnt; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Find the number of levels on the tree. */ + + /* Parameter adjustments */ + --ndimr; + --ndiml; + --inode; + + /* Function Body */ + maxn = max(1,*n); + temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.); + *lvl = (integer) temp + 1; + + i__ = *n / 2; + inode[1] = i__ + 1; + ndiml[1] = i__; + ndimr[1] = *n - i__ - 1; + il = 0; + ir = 1; + llst = 1; + i__1 = *lvl - 1; + for (nlvl = 1; nlvl <= i__1; ++nlvl) { + +/* Constructing the tree at (NLVL+1)-st level. The number of */ +/* nodes created on this level is LLST * 2. */ + + i__2 = llst - 1; + for (i__ = 0; i__ <= i__2; ++i__) { + il += 2; + ir += 2; + ncrnt = llst + i__; + ndiml[il] = ndiml[ncrnt] / 2; + ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1; + inode[il] = inode[ncrnt] - ndimr[il] - 1; + ndiml[ir] = ndimr[ncrnt] / 2; + ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1; + inode[ir] = inode[ncrnt] + ndiml[ir] + 1; +/* L10: */ + } + llst <<= 1; +/* L20: */ + } + *nd = (llst << 1) - 1; + + return 0; + +/* End of DLASDT */ + +} /* dlasdt_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaset.cpp b/lib/linalg/dlaset.cpp new file mode 100644 index 0000000000..072bbf1500 --- /dev/null +++ b/lib/linalg/dlaset.cpp @@ -0,0 +1,229 @@ +/* fortran/dlaset.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given val +ues. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASET + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER LDA, M, N */ +/* DOUBLE PRECISION ALPHA, BETA */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASET initializes an m-by-n matrix A to BETA on the diagonal and */ +/* > ALPHA on the offdiagonals. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies the part of the matrix A to be set. */ +/* > = 'U': Upper triangular part is set; the strictly lower */ +/* > triangular part of A is not changed. */ +/* > = 'L': Lower triangular part is set; the strictly upper */ +/* > triangular part of A is not changed. */ +/* > Otherwise: All of the matrix A is set. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION */ +/* > The constant to which the offdiagonal elements are to be set. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION */ +/* > The constant to which the diagonal elements are to be set. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On exit, the leading m-by-n submatrix of A is set as follows: */ +/* > */ +/* > if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */ +/* > if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */ +/* > otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */ +/* > */ +/* > and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal * + alpha, doublereal *beta, doublereal *a, integer *lda, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Set the strictly upper triangular or trapezoidal part of the */ +/* array to ALPHA. */ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Set the strictly lower triangular or trapezoidal part of the */ +/* array to ALPHA. */ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L30: */ + } +/* L40: */ + } + + } else { + +/* Set the leading m-by-n submatrix to ALPHA. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = *alpha; +/* L50: */ + } +/* L60: */ + } + } + +/* Set the first min(M,N) diagonal elements to BETA. */ + + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + i__ * a_dim1] = *beta; +/* L70: */ + } + + return 0; + +/* End of DLASET */ + +} /* dlaset_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasq1.cpp b/lib/linalg/dlasq1.cpp new file mode 100644 index 0000000000..090f680525 --- /dev/null +++ b/lib/linalg/dlasq1.cpp @@ -0,0 +1,298 @@ +/* fortran/dlasq1.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__2 = 2; +static integer c__0 = 0; + +/* > \brief \b DLASQ1 computes the singular values of a real square bidiagonal matrix. Used by sbdsqr. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASQ1 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASQ1( N, D, E, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), E( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASQ1 computes the singular values of a real N-by-N bidiagonal */ +/* > matrix with diagonal D and off-diagonal E. The singular values */ +/* > are computed to high relative accuracy, in the absence of */ +/* > denormalization, underflow and overflow. The algorithm was first */ +/* > presented in */ +/* > */ +/* > (char *)"Accurate singular values and differential qd algorithms" by K. V. */ +/* > Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, */ +/* > 1994, */ +/* > */ +/* > and the present implementation is described in "An implementation of */ +/* > the dqds Algorithm (Positive Case)", LAPACK Working Note. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows and columns in the matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, D contains the diagonal elements of the */ +/* > bidiagonal matrix whose SVD is desired. On normal exit, */ +/* > D contains the singular values in decreasing order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, elements E(1:N-1) contain the off-diagonal elements */ +/* > of the bidiagonal matrix whose SVD is desired. */ +/* > On exit, E is overwritten. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: the algorithm failed */ +/* > = 1, a split was marked by a positive value in E */ +/* > = 2, current block of Z not diagonalized after 100*N */ +/* > iterations (in inner while loop) On exit D and E */ +/* > represent a matrix with the same singular values */ +/* > which the calling subroutine could use to finish the */ +/* > computation, or even feed back into DLASQ1 */ +/* > = 3, termination criterion of outer while loop not met */ +/* > (program created more than N unreduced blocks) */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e, + doublereal *work, integer *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer i__; + doublereal eps; + extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + doublereal scale; + integer iinfo; + doublereal sigmn; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + doublereal sigmx; + extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *); + extern doublereal dlamch_(char *, ftnlen); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen); + doublereal safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dlasrt_( + char *, integer *, doublereal *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --work; + --e; + --d__; + + /* Function Body */ + *info = 0; + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_((char *)"DLASQ1", &i__1, (ftnlen)6); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + d__[1] = abs(d__[1]); + return 0; + } else if (*n == 2) { + dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx); + d__[1] = sigmx; + d__[2] = sigmn; + return 0; + } + +/* Estimate the largest singular value. */ + + sigmx = 0.; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = (d__1 = d__[i__], abs(d__1)); +/* Computing MAX */ + d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1)); + sigmx = max(d__2,d__3); +/* L10: */ + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + +/* Early return if SIGMX is zero (matrix is already diagonal). */ + + if (sigmx == 0.) { + dlasrt_((char *)"D", n, &d__[1], &iinfo, (ftnlen)1); + return 0; + } + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__1 = sigmx, d__2 = d__[i__]; + sigmx = max(d__1,d__2); +/* L20: */ + } + +/* Copy D and E into WORK (in the Z format) and scale (squaring the */ +/* input data makes scaling by a power of the radix pointless). */ + + eps = dlamch_((char *)"Precision", (ftnlen)9); + safmin = dlamch_((char *)"Safe minimum", (ftnlen)12); + scale = sqrt(eps / safmin); + dcopy_(n, &d__[1], &c__1, &work[1], &c__2); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2); + i__1 = (*n << 1) - 1; + i__2 = (*n << 1) - 1; + dlascl_((char *)"G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2, + &iinfo, (ftnlen)1); + +/* Compute the q's and e's. */ + + i__1 = (*n << 1) - 1; + for (i__ = 1; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = work[i__]; + work[i__] = d__1 * d__1; +/* L30: */ + } + work[*n * 2] = 0.; + + dlasq2_(n, &work[1], info); + + if (*info == 0) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = sqrt(work[i__]); +/* L40: */ + } + dlascl_((char *)"G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & + iinfo, (ftnlen)1); + } else if (*info == 2) { + +/* Maximum number of iterations exceeded. Move data from WORK */ +/* into D and E so the calling subroutine can try to finish */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = sqrt(work[(i__ << 1) - 1]); + e[i__] = sqrt(work[i__ * 2]); + } + dlascl_((char *)"G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, & + iinfo, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &e[1], n, &iinfo, + (ftnlen)1); + } + + return 0; + +/* End of DLASQ1 */ + +} /* dlasq1_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasq2.cpp b/lib/linalg/dlasq2.cpp new file mode 100644 index 0000000000..a6362ecfdf --- /dev/null +++ b/lib/linalg/dlasq2.cpp @@ -0,0 +1,716 @@ +/* fortran/dlasq2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c__2 = 2; +static integer c__10 = 10; +static integer c__3 = 3; +static integer c__4 = 4; + +/* > \brief \b DLASQ2 computes all the eigenvalues of the symmetric positive definite tridiagonal matrix assoc +iated with the qd Array Z to high relative accuracy. Used by sbdsqr and sstegr. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASQ2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASQ2( N, Z, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASQ2 computes all the eigenvalues of the symmetric positive */ +/* > definite tridiagonal matrix associated with the qd array Z to high */ +/* > relative accuracy are computed to high relative accuracy, in the */ +/* > absence of denormalization, underflow and overflow. */ +/* > */ +/* > To see the relation of Z to the tridiagonal matrix, let L be a */ +/* > unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */ +/* > let U be an upper bidiagonal matrix with 1's above and diagonal */ +/* > Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */ +/* > symmetric tridiagonal to which it is similar. */ +/* > */ +/* > Note : DLASQ2 defines a logical variable, IEEE, which is true */ +/* > on machines which follow ieee-754 floating-point standard in their */ +/* > handling of infinities and NaNs, and false otherwise. This variable */ +/* > is passed to DLASQ3. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of rows and columns in the matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( 4*N ) */ +/* > On entry Z holds the qd array. On exit, entries 1 to N hold */ +/* > the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */ +/* > trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */ +/* > N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */ +/* > holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */ +/* > shifts that failed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if the i-th argument is a scalar and had an illegal */ +/* > value, then INFO = -i, if the i-th argument is an */ +/* > array and the j-entry had an illegal value, then */ +/* > INFO = -(i*100+j) */ +/* > > 0: the algorithm failed */ +/* > = 1, a split was marked by a positive value in E */ +/* > = 2, current block of Z not diagonalized after 100*N */ +/* > iterations (in inner while loop). On exit Z holds */ +/* > a qd array with the same eigenvalues as the given Z. */ +/* > = 3, termination criterion of outer while loop not met */ +/* > (program created more than N unreduced blocks) */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Local Variables: I0:N0 defines a current unreduced segment of Z. */ +/* > The shifts are accumulated in SIGMA. Iteration count is in ITER. */ +/* > Ping-pong is controlled by PP (alternates between 0 and 1). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal d__, e, g; + integer k; + doublereal s, t; + integer i0, i1, i4, n0, n1; + doublereal dn; + integer pp; + doublereal dn1, dn2, dee, eps, tau, tol; + integer ipn4; + doublereal tol2; + logical ieee; + integer nbig; + doublereal dmin__, emin, emax; + integer kmin, ndiv, iter; + doublereal qmin, temp, qmax, zmax; + integer splt; + doublereal dmin1, dmin2; + integer nfail; + doublereal desig, trace, sigma; + integer iinfo; + doublereal tempe, tempq; + integer ttype; + extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + integer *, integer *, integer *, logical *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + doublereal deemin; + integer iwhila, iwhilb; + doublereal oldemn, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments. */ +/* (in case DLASQ2 is not called by DLASQ1) */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + *info = 0; + eps = dlamch_((char *)"Precision", (ftnlen)9); + safmin = dlamch_((char *)"Safe minimum", (ftnlen)12); + tol = eps * 100.; +/* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + + if (*n < 0) { + *info = -1; + xerbla_((char *)"DLASQ2", &c__1, (ftnlen)6); + return 0; + } else if (*n == 0) { + return 0; + } else if (*n == 1) { + +/* 1-by-1 case. */ + + if (z__[1] < 0.) { + *info = -201; + xerbla_((char *)"DLASQ2", &c__2, (ftnlen)6); + } + return 0; + } else if (*n == 2) { + +/* 2-by-2 case. */ + + if (z__[1] < 0.) { + *info = -201; + xerbla_((char *)"DLASQ2", &c__2, (ftnlen)6); + return 0; + } else if (z__[2] < 0.) { + *info = -202; + xerbla_((char *)"DLASQ2", &c__2, (ftnlen)6); + return 0; + } else if (z__[3] < 0.) { + *info = -203; + xerbla_((char *)"DLASQ2", &c__2, (ftnlen)6); + return 0; + } else if (z__[3] > z__[1]) { + d__ = z__[3]; + z__[3] = z__[1]; + z__[1] = d__; + } + z__[5] = z__[1] + z__[2] + z__[3]; + if (z__[2] > z__[3] * tol2) { + t = (z__[1] - z__[3] + z__[2]) * .5; + s = z__[3] * (z__[2] / t); + if (s <= t) { + s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[1] + (s + z__[2]); + z__[3] *= z__[1] / t; + z__[1] = t; + } + z__[2] = z__[3]; + z__[6] = z__[2] + z__[1]; + return 0; + } + +/* Check for negative data and compute sums of q's and e's. */ + + z__[*n * 2] = 0.; + emin = z__[2]; + qmax = 0.; + zmax = 0.; + d__ = 0.; + e = 0.; + + i__1 = *n - 1 << 1; + for (k = 1; k <= i__1; k += 2) { + if (z__[k] < 0.) { + *info = -(k + 200); + xerbla_((char *)"DLASQ2", &c__2, (ftnlen)6); + return 0; + } else if (z__[k + 1] < 0.) { + *info = -(k + 201); + xerbla_((char *)"DLASQ2", &c__2, (ftnlen)6); + return 0; + } + d__ += z__[k]; + e += z__[k + 1]; +/* Computing MAX */ + d__1 = qmax, d__2 = z__[k]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[k + 1]; + emin = min(d__1,d__2); +/* Computing MAX */ + d__1 = max(qmax,zmax), d__2 = z__[k + 1]; + zmax = max(d__1,d__2); +/* L10: */ + } + if (z__[(*n << 1) - 1] < 0.) { + *info = -((*n << 1) + 199); + xerbla_((char *)"DLASQ2", &c__2, (ftnlen)6); + return 0; + } + d__ += z__[(*n << 1) - 1]; +/* Computing MAX */ + d__1 = qmax, d__2 = z__[(*n << 1) - 1]; + qmax = max(d__1,d__2); + zmax = max(qmax,zmax); + +/* Check for diagonality. */ + + if (e == 0.) { + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 1) - 1]; +/* L20: */ + } + dlasrt_((char *)"D", n, &z__[1], &iinfo, (ftnlen)1); + z__[(*n << 1) - 1] = d__; + return 0; + } + + trace = d__ + e; + +/* Check for zero data. */ + + if (trace == 0.) { + z__[(*n << 1) - 1] = 0.; + return 0; + } + +/* Check whether the machine is IEEE conformable. */ + + ieee = ilaenv_(&c__10, (char *)"DLASQ2", (char *)"N", &c__1, &c__2, &c__3, &c__4, (ftnlen) + 6, (ftnlen)1) == 1; + +/* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */ + + for (k = *n << 1; k >= 2; k += -2) { + z__[k * 2] = 0.; + z__[(k << 1) - 1] = z__[k]; + z__[(k << 1) - 2] = 0.; + z__[(k << 1) - 3] = z__[k - 1]; +/* L30: */ + } + + i0 = 1; + n0 = *n; + +/* Reverse the qd-array, if warranted. */ + + if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) { + ipn4 = i0 + n0 << 2; + i__1 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__1; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; +/* L40: */ + } + } + +/* Initial split checking via dqd and Li's test. */ + + pp = 0; + + for (k = 1; k <= 2; ++k) { + + d__ = z__[(n0 << 2) + pp - 3]; + i__1 = (i0 << 2) + pp; + for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) { + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + d__ = z__[i4 - 3]; + } else { + d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1])); + } +/* L50: */ + } + +/* dqd maps Z to ZZ plus Li's test. */ + + emin = z__[(i0 << 2) + pp + 1]; + d__ = z__[(i0 << 2) + pp - 3]; + i__1 = (n0 - 1 << 2) + pp; + for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) { + z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1]; + if (z__[i4 - 1] <= tol2 * d__) { + z__[i4 - 1] = -0.; + z__[i4 - (pp << 1) - 2] = d__; + z__[i4 - (pp << 1)] = 0.; + d__ = z__[i4 + 1]; + } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] && + safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) { + temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2]; + z__[i4 - (pp << 1)] = z__[i4 - 1] * temp; + d__ *= temp; + } else { + z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - ( + pp << 1) - 2]); + d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]); + } +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - (pp << 1)]; + emin = min(d__1,d__2); +/* L60: */ + } + z__[(n0 << 2) - pp - 2] = d__; + +/* Now find qmax. */ + + qmax = z__[(i0 << 2) - pp - 2]; + i__1 = (n0 << 2) - pp - 2; + for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) { +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4]; + qmax = max(d__1,d__2); +/* L70: */ + } + +/* Prepare for the next iteration on K. */ + + pp = 1 - pp; +/* L80: */ + } + +/* Initialise variables to pass to DLASQ3. */ + + ttype = 0; + dmin1 = 0.; + dmin2 = 0.; + dn = 0.; + dn1 = 0.; + dn2 = 0.; + g = 0.; + tau = 0.; + + iter = 2; + nfail = 0; + ndiv = n0 - i0 << 1; + + i__1 = *n + 1; + for (iwhila = 1; iwhila <= i__1; ++iwhila) { + if (n0 < 1) { + goto L170; + } + +/* While array unfinished do */ + +/* E(N0) holds the value of SIGMA when submatrix in I0:N0 */ +/* splits from the rest of the array, but is negated. */ + + desig = 0.; + if (n0 == *n) { + sigma = 0.; + } else { + sigma = -z__[(n0 << 2) - 1]; + } + if (sigma < 0.) { + *info = 1; + return 0; + } + +/* Find last unreduced submatrix's top index I0, find QMAX and */ +/* EMIN. Find Gershgorin-type bound if Q's much greater than E's. */ + + emax = 0.; + if (n0 > i0) { + emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1)); + } else { + emin = 0.; + } + qmin = z__[(n0 << 2) - 3]; + qmax = qmin; + for (i4 = n0 << 2; i4 >= 8; i4 += -4) { + if (z__[i4 - 5] <= 0.) { + goto L100; + } + if (qmin >= emax * 4.) { +/* Computing MIN */ + d__1 = qmin, d__2 = z__[i4 - 3]; + qmin = min(d__1,d__2); +/* Computing MAX */ + d__1 = emax, d__2 = z__[i4 - 5]; + emax = max(d__1,d__2); + } +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 5]; + emin = min(d__1,d__2); +/* L90: */ + } + i4 = 4; + +L100: + i0 = i4 / 4; + pp = 0; + + if (n0 - i0 > 1) { + dee = z__[(i0 << 2) - 3]; + deemin = dee; + kmin = i0; + i__2 = (n0 << 2) - 3; + for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) { + dee = z__[i4] * (dee / (dee + z__[i4 - 2])); + if (dee <= deemin) { + deemin = dee; + kmin = (i4 + 3) / 4; + } +/* L110: */ + } + if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] * + .5) { + ipn4 = i0 + n0 << 2; + pp = 2; + i__2 = i0 + n0 - 1 << 1; + for (i4 = i0 << 2; i4 <= i__2; i4 += 4) { + temp = z__[i4 - 3]; + z__[i4 - 3] = z__[ipn4 - i4 - 3]; + z__[ipn4 - i4 - 3] = temp; + temp = z__[i4 - 2]; + z__[i4 - 2] = z__[ipn4 - i4 - 2]; + z__[ipn4 - i4 - 2] = temp; + temp = z__[i4 - 1]; + z__[i4 - 1] = z__[ipn4 - i4 - 5]; + z__[ipn4 - i4 - 5] = temp; + temp = z__[i4]; + z__[i4] = z__[ipn4 - i4 - 4]; + z__[ipn4 - i4 - 4] = temp; +/* L120: */ + } + } + } + +/* Put -(initial shift) into DMIN. */ + +/* Computing MAX */ + d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax); + dmin__ = -max(d__1,d__2); + +/* Now I0:N0 is unreduced. */ +/* PP = 0 for ping, PP = 1 for pong. */ +/* PP = 2 indicates that flipping was applied to the Z array and */ +/* and that the tests for deflation upon entry in DLASQ3 */ +/* should not be performed. */ + + nbig = (n0 - i0 + 1) * 100; + i__2 = nbig; + for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) { + if (i0 > n0) { + goto L150; + } + +/* While submatrix unfinished take a good dqds step. */ + + dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, & + nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, & + dn1, &dn2, &g, &tau); + + pp = 1 - pp; + +/* When EMIN is very small check for splits. */ + + if (pp == 0 && n0 - i0 >= 3) { + if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 * + sigma) { + splt = i0 - 1; + qmax = z__[(i0 << 2) - 3]; + emin = z__[(i0 << 2) - 1]; + oldemn = z__[i0 * 4]; + i__3 = n0 - 3 << 2; + for (i4 = i0 << 2; i4 <= i__3; i4 += 4) { + if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <= + tol2 * sigma) { + z__[i4 - 1] = -sigma; + splt = i4 / 4; + qmax = 0.; + emin = z__[i4 + 3]; + oldemn = z__[i4 + 4]; + } else { +/* Computing MAX */ + d__1 = qmax, d__2 = z__[i4 + 1]; + qmax = max(d__1,d__2); +/* Computing MIN */ + d__1 = emin, d__2 = z__[i4 - 1]; + emin = min(d__1,d__2); +/* Computing MIN */ + d__1 = oldemn, d__2 = z__[i4]; + oldemn = min(d__1,d__2); + } +/* L130: */ + } + z__[(n0 << 2) - 1] = emin; + z__[n0 * 4] = oldemn; + i0 = splt + 1; + } + } + +/* L140: */ + } + + *info = 2; + +/* Maximum number of iterations exceeded, restore the shift */ +/* SIGMA and place the new d's and e's in a qd array. */ +/* This might need to be done for several blocks */ + + i1 = i0; + n1 = n0; +L145: + tempq = z__[(i0 << 2) - 3]; + z__[(i0 << 2) - 3] += sigma; + i__2 = n0; + for (k = i0 + 1; k <= i__2; ++k) { + tempe = z__[(k << 2) - 5]; + z__[(k << 2) - 5] *= tempq / z__[(k << 2) - 7]; + tempq = z__[(k << 2) - 3]; + z__[(k << 2) - 3] = z__[(k << 2) - 3] + sigma + tempe - z__[(k << + 2) - 5]; + } + +/* Prepare to do this on the previous block if there is one */ + + if (i1 > 1) { + n1 = i1 - 1; + while(i1 >= 2 && z__[(i1 << 2) - 5] >= 0.) { + --i1; + } + sigma = -z__[(n1 << 2) - 1]; + goto L145; + } + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + z__[(k << 1) - 1] = z__[(k << 2) - 3]; + +/* Only the block 1..N0 is unfinished. The rest of the e's */ +/* must be essentially zero, although sometimes other data */ +/* has been stored in them. */ + + if (k < n0) { + z__[k * 2] = z__[(k << 2) - 1]; + } else { + z__[k * 2] = 0.; + } + } + return 0; + +/* end IWHILB */ + +L150: + +/* L160: */ + ; + } + + *info = 3; + return 0; + +/* end IWHILA */ + +L170: + +/* Move q's to the front. */ + + i__1 = *n; + for (k = 2; k <= i__1; ++k) { + z__[k] = z__[(k << 2) - 3]; +/* L180: */ + } + +/* Sort and compute sum of eigenvalues. */ + + dlasrt_((char *)"D", n, &z__[1], &iinfo, (ftnlen)1); + + e = 0.; + for (k = *n; k >= 1; --k) { + e += z__[k]; +/* L190: */ + } + +/* Store trace, sum(eigenvalues) and information on performance. */ + + z__[(*n << 1) + 1] = trace; + z__[(*n << 1) + 2] = e; + z__[(*n << 1) + 3] = (doublereal) iter; +/* Computing 2nd power */ + i__1 = *n; + z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1); + z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter; + return 0; + +/* End of DLASQ2 */ + +} /* dlasq2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasq3.cpp b/lib/linalg/dlasq3.cpp new file mode 100644 index 0000000000..e5974545a6 --- /dev/null +++ b/lib/linalg/dlasq3.cpp @@ -0,0 +1,475 @@ +/* fortran/dlasq3.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASQ3 checks for deflation, computes a shift and calls dqds. Used by sbdsqr. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASQ3 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, */ +/* ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, */ +/* DN2, G, TAU ) */ + +/* .. Scalar Arguments .. */ +/* LOGICAL IEEE */ +/* INTEGER I0, ITER, N0, NDIV, NFAIL, PP */ +/* DOUBLE PRECISION DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, */ +/* $ QMAX, SIGMA, TAU */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */ +/* > In case of failure it changes shifts, and tries again until output */ +/* > is positive. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] I0 */ +/* > \verbatim */ +/* > I0 is INTEGER */ +/* > First index. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] N0 */ +/* > \verbatim */ +/* > N0 is INTEGER */ +/* > Last index. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( 4*N0 ) */ +/* > Z holds the qd array. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] PP */ +/* > \verbatim */ +/* > PP is INTEGER */ +/* > PP=0 for ping, PP=1 for pong. */ +/* > PP=2 indicates that flipping was applied to the Z array */ +/* > and that the initial tests for deflation should not be */ +/* > performed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DMIN */ +/* > \verbatim */ +/* > DMIN is DOUBLE PRECISION */ +/* > Minimum value of d. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SIGMA */ +/* > \verbatim */ +/* > SIGMA is DOUBLE PRECISION */ +/* > Sum of shifts used in current segment. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DESIG */ +/* > \verbatim */ +/* > DESIG is DOUBLE PRECISION */ +/* > Lower order part of SIGMA */ +/* > \endverbatim */ +/* > */ +/* > \param[in] QMAX */ +/* > \verbatim */ +/* > QMAX is DOUBLE PRECISION */ +/* > Maximum value of q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] NFAIL */ +/* > \verbatim */ +/* > NFAIL is INTEGER */ +/* > Increment NFAIL by 1 each time the shift was too big. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ITER */ +/* > \verbatim */ +/* > ITER is INTEGER */ +/* > Increment ITER by 1 for each iteration. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] NDIV */ +/* > \verbatim */ +/* > NDIV is INTEGER */ +/* > Increment NDIV by 1 for each division. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IEEE */ +/* > \verbatim */ +/* > IEEE is LOGICAL */ +/* > Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] TTYPE */ +/* > \verbatim */ +/* > TTYPE is INTEGER */ +/* > Shift type. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DMIN1 */ +/* > \verbatim */ +/* > DMIN1 is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DMIN2 */ +/* > \verbatim */ +/* > DMIN2 is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DN */ +/* > \verbatim */ +/* > DN is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DN1 */ +/* > \verbatim */ +/* > DN1 is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DN2 */ +/* > \verbatim */ +/* > DN2 is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] G */ +/* > \verbatim */ +/* > G is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION */ +/* > */ +/* > These are passed as arguments in order to save their values */ +/* > between calls to DLASQ3. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig, + doublereal *qmax, integer *nfail, integer *iter, integer *ndiv, + logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g, + doublereal *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal s, t; + integer j4, nn; + doublereal eps, tol; + integer n0in, ipn4; + doublereal tol2, temp; + extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *, + integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *), dlasq5_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, logical * + , doublereal *), dlasq6_(integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern logical disnan_(doublereal *); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Function .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + n0in = *n0; + eps = dlamch_((char *)"Precision", (ftnlen)9); + tol = eps * 100.; +/* Computing 2nd power */ + d__1 = tol; + tol2 = d__1 * d__1; + +/* Check for deflation. */ + +L10: + + if (*n0 < *i0) { + return 0; + } + if (*n0 == *i0) { + goto L20; + } + nn = (*n0 << 2) + *pp; + if (*n0 == *i0 + 1) { + goto L40; + } + +/* Check whether E(N0-1) is negligible, 1 eigenvalue. */ + + if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - + 4] > tol2 * z__[nn - 7]) { + goto L30; + } + +L20: + + z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma; + --(*n0); + goto L10; + +/* Check whether E(N0-2) is negligible, 2 eigenvalues. */ + +L30: + + if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[ + nn - 11]) { + goto L50; + } + +L40: + + if (z__[nn - 3] > z__[nn - 7]) { + s = z__[nn - 3]; + z__[nn - 3] = z__[nn - 7]; + z__[nn - 7] = s; + } + t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5; + if (z__[nn - 5] > z__[nn - 3] * tol2 && t != 0.) { + s = z__[nn - 3] * (z__[nn - 5] / t); + if (s <= t) { + s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.))); + } else { + s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s))); + } + t = z__[nn - 7] + (s + z__[nn - 5]); + z__[nn - 3] *= z__[nn - 7] / t; + z__[nn - 7] = t; + } + z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma; + z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma; + *n0 += -2; + goto L10; + +L50: + if (*pp == 2) { + *pp = 0; + } + +/* Reverse the qd-array, if warranted. */ + + if (*dmin__ <= 0. || *n0 < n0in) { + if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) { + ipn4 = *i0 + *n0 << 2; + i__1 = *i0 + *n0 - 1 << 1; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + temp = z__[j4 - 3]; + z__[j4 - 3] = z__[ipn4 - j4 - 3]; + z__[ipn4 - j4 - 3] = temp; + temp = z__[j4 - 2]; + z__[j4 - 2] = z__[ipn4 - j4 - 2]; + z__[ipn4 - j4 - 2] = temp; + temp = z__[j4 - 1]; + z__[j4 - 1] = z__[ipn4 - j4 - 5]; + z__[ipn4 - j4 - 5] = temp; + temp = z__[j4]; + z__[j4] = z__[ipn4 - j4 - 4]; + z__[ipn4 - j4 - 4] = temp; +/* L60: */ + } + if (*n0 - *i0 <= 4) { + z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1]; + z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp]; + } +/* Computing MIN */ + d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1]; + *dmin2 = min(d__1,d__2); +/* Computing MIN */ + d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1] + , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3]; + z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2); +/* Computing MIN */ + d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 = + min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4]; + z__[(*n0 << 2) - *pp] = min(d__1,d__2); +/* Computing MAX */ + d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1, + d__2), d__2 = z__[(*i0 << 2) + *pp + 1]; + *qmax = max(d__1,d__2); + *dmin__ = -0.; + } + } + +/* Choose a shift. */ + + dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, + tau, ttype, g); + +/* Call dqds until DMIN > 0. */ + +L70: + + dlasq5_(i0, n0, &z__[1], pp, tau, sigma, dmin__, dmin1, dmin2, dn, dn1, + dn2, ieee, &eps); + + *ndiv += *n0 - *i0 + 2; + ++(*iter); + +/* Check status. */ + + if (*dmin__ >= 0. && *dmin1 >= 0.) { + +/* Success. */ + + goto L90; + + } else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < tol + * (*sigma + *dn1) && abs(*dn) < tol * *sigma) { + +/* Convergence hidden by negative DN. */ + + z__[(*n0 - 1 << 2) - *pp + 2] = 0.; + *dmin__ = 0.; + goto L90; + } else if (*dmin__ < 0.) { + +/* TAU too big. Select new TAU and try again. */ + + ++(*nfail); + if (*ttype < -22) { + +/* Failed twice. Play it safe. */ + + *tau = 0.; + } else if (*dmin1 > 0.) { + +/* Late failure. Gives excellent shift. */ + + *tau = (*tau + *dmin__) * (1. - eps * 2.); + *ttype += -11; + } else { + +/* Early failure. Divide by 4. */ + + *tau *= .25; + *ttype += -12; + } + goto L70; + } else if (disnan_(dmin__)) { + +/* NaN. */ + + if (*tau == 0.) { + goto L80; + } else { + *tau = 0.; + goto L70; + } + } else { + +/* Possible underflow. Play it safe. */ + + goto L80; + } + +/* Risk of underflow. */ + +L80: + dlasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2); + *ndiv += *n0 - *i0 + 2; + ++(*iter); + *tau = 0.; + +L90: + if (*tau < *sigma) { + *desig += *tau; + t = *sigma + *desig; + *desig -= t - *sigma; + } else { + t = *sigma + *tau; + *desig = *sigma - (t - *tau) + *desig; + } + *sigma = t; + + return 0; + +/* End of DLASQ3 */ + +} /* dlasq3_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasq4.cpp b/lib/linalg/dlasq4.cpp new file mode 100644 index 0000000000..4841ada1d2 --- /dev/null +++ b/lib/linalg/dlasq4.cpp @@ -0,0 +1,501 @@ +/* fortran/dlasq4.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASQ4 computes an approximation to the smallest eigenvalue using values of d from the previous + transform. Used by sbdsqr. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASQ4 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, */ +/* DN1, DN2, TAU, TTYPE, G ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER I0, N0, N0IN, PP, TTYPE */ +/* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASQ4 computes an approximation TAU to the smallest eigenvalue */ +/* > using values of d from the previous transform. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] I0 */ +/* > \verbatim */ +/* > I0 is INTEGER */ +/* > First index. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N0 */ +/* > \verbatim */ +/* > N0 is INTEGER */ +/* > Last index. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( 4*N0 ) */ +/* > Z holds the qd array. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PP */ +/* > \verbatim */ +/* > PP is INTEGER */ +/* > PP=0 for ping, PP=1 for pong. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N0IN */ +/* > \verbatim */ +/* > N0IN is INTEGER */ +/* > The value of N0 at start of EIGTEST. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DMIN */ +/* > \verbatim */ +/* > DMIN is DOUBLE PRECISION */ +/* > Minimum value of d. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DMIN1 */ +/* > \verbatim */ +/* > DMIN1 is DOUBLE PRECISION */ +/* > Minimum value of d, excluding D( N0 ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DMIN2 */ +/* > \verbatim */ +/* > DMIN2 is DOUBLE PRECISION */ +/* > Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DN */ +/* > \verbatim */ +/* > DN is DOUBLE PRECISION */ +/* > d(N) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DN1 */ +/* > \verbatim */ +/* > DN1 is DOUBLE PRECISION */ +/* > d(N-1) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DN2 */ +/* > \verbatim */ +/* > DN2 is DOUBLE PRECISION */ +/* > d(N-2) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION */ +/* > This is the shift. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TTYPE */ +/* > \verbatim */ +/* > TTYPE is INTEGER */ +/* > Shift type. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] G */ +/* > \verbatim */ +/* > G is DOUBLE PRECISION */ +/* > G is passed as an argument in order to save its value between */ +/* > calls to DLASQ4. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CNST1 = 9/16 */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__, + integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1, + doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2, + doublereal *tau, integer *ttype, doublereal *g) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal s, a2, b1, b2; + integer i4, nn, np; + doublereal gam, gap1, gap2; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* A negative DMIN forces the shift to take that absolute value */ +/* TTYPE records the type of shift. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*dmin__ <= 0.) { + *tau = -(*dmin__); + *ttype = -1; + return 0; + } + + nn = (*n0 << 2) + *pp; + if (*n0in == *n0) { + +/* No eigenvalues deflated. */ + + if (*dmin__ == *dn || *dmin__ == *dn1) { + + b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]); + b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]); + a2 = z__[nn - 7] + z__[nn - 5]; + +/* Cases 2 and 3. */ + + if (*dmin__ == *dn && *dmin1 == *dn1) { + gap2 = *dmin2 - a2 - *dmin2 * .25; + if (gap2 > 0. && gap2 > b2) { + gap1 = a2 - *dn - b2 / gap2 * b2; + } else { + gap1 = a2 - *dn - (b1 + b2); + } + if (gap1 > 0. && gap1 > b1) { +/* Computing MAX */ + d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5; + s = max(d__1,d__2); + *ttype = -2; + } else { + s = 0.; + if (*dn > b1) { + s = *dn - b1; + } + if (a2 > b1 + b2) { +/* Computing MIN */ + d__1 = s, d__2 = a2 - (b1 + b2); + s = min(d__1,d__2); + } +/* Computing MAX */ + d__1 = s, d__2 = *dmin__ * .333; + s = max(d__1,d__2); + *ttype = -3; + } + } else { + +/* Case 4. */ + + *ttype = -4; + s = *dmin__ * .25; + if (*dmin__ == *dn) { + gam = *dn; + a2 = 0.; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b2 = z__[nn - 5] / z__[nn - 7]; + np = nn - 9; + } else { + np = nn - (*pp << 1); + gam = *dn1; + if (z__[np - 4] > z__[np - 2]) { + return 0; + } + a2 = z__[np - 4] / z__[np - 2]; + if (z__[nn - 9] > z__[nn - 11]) { + return 0; + } + b2 = z__[nn - 9] / z__[nn - 11]; + np = nn - 13; + } + +/* Approximate contribution to norm squared from I < NN-1. */ + + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = np; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L20; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (max(b2,b1) * 100. < a2 || .563 < a2) { + goto L20; + } +/* L10: */ + } +L20: + a2 *= 1.05; + +/* Rayleigh quotient residual bound. */ + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } + } else if (*dmin__ == *dn2) { + +/* Case 5. */ + + *ttype = -5; + s = *dmin__ * .25; + +/* Compute contribution to norm squared from I > NN-2. */ + + np = nn - (*pp << 1); + b1 = z__[np - 2]; + b2 = z__[np - 6]; + gam = *dn2; + if (z__[np - 8] > b2 || z__[np - 4] > b1) { + return 0; + } + a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.); + +/* Approximate contribution to norm squared from I < NN-2. */ + + if (*n0 - *i0 > 2) { + b2 = z__[nn - 13] / z__[nn - 15]; + a2 += b2; + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = nn - 17; i4 >= i__1; i4 += -4) { + if (b2 == 0.) { + goto L40; + } + b1 = b2; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b2 *= z__[i4] / z__[i4 - 2]; + a2 += b2; + if (max(b2,b1) * 100. < a2 || .563 < a2) { + goto L40; + } +/* L30: */ + } +L40: + a2 *= 1.05; + } + + if (a2 < .563) { + s = gam * (1. - sqrt(a2)) / (a2 + 1.); + } + } else { + +/* Case 6, no information to guide us. */ + + if (*ttype == -6) { + *g += (1. - *g) * .333; + } else if (*ttype == -18) { + *g = .083250000000000005; + } else { + *g = .25; + } + s = *g * *dmin__; + *ttype = -6; + } + + } else if (*n0in == *n0 + 1) { + +/* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */ + + if (*dmin1 == *dn1 && *dmin2 == *dn2) { + +/* Cases 7 and 8. */ + + *ttype = -7; + s = *dmin1 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L60; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + a2 = b1; + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (max(b1,a2) * 100. < b2) { + goto L60; + } +/* L50: */ + } +L60: + b2 = sqrt(b2 * 1.05); +/* Computing 2nd power */ + d__1 = b2; + a2 = *dmin1 / (d__1 * d__1 + 1.); + gap2 = *dmin2 * .5 - a2; + if (gap2 > 0. && gap2 > b2 * a2) { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = max(d__1,d__2); + *ttype = -8; + } + } else { + +/* Case 9. */ + + s = *dmin1 * .25; + if (*dmin1 == *dn1) { + s = *dmin1 * .5; + } + *ttype = -9; + } + + } else if (*n0in == *n0 + 2) { + +/* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */ + +/* Cases 10 and 11. */ + + if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) { + *ttype = -10; + s = *dmin2 * .333; + if (z__[nn - 5] > z__[nn - 7]) { + return 0; + } + b1 = z__[nn - 5] / z__[nn - 7]; + b2 = b1; + if (b2 == 0.) { + goto L80; + } + i__1 = (*i0 << 2) - 1 + *pp; + for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) { + if (z__[i4] > z__[i4 - 2]) { + return 0; + } + b1 *= z__[i4] / z__[i4 - 2]; + b2 += b1; + if (b1 * 100. < b2) { + goto L80; + } +/* L70: */ + } +L80: + b2 = sqrt(b2 * 1.05); +/* Computing 2nd power */ + d__1 = b2; + a2 = *dmin2 / (d__1 * d__1 + 1.); + gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[ + nn - 9]) - a2; + if (gap2 > 0. && gap2 > b2 * a2) { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2); + s = max(d__1,d__2); + } else { +/* Computing MAX */ + d__1 = s, d__2 = a2 * (1. - b2 * 1.01); + s = max(d__1,d__2); + } + } else { + s = *dmin2 * .25; + *ttype = -11; + } + } else if (*n0in > *n0 + 2) { + +/* Case 12, more than two eigenvalues deflated. No information. */ + + s = 0.; + *ttype = -12; + } + + *tau = s; + return 0; + +/* End of DLASQ4 */ + +} /* dlasq4_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasq5.cpp b/lib/linalg/dlasq5.cpp new file mode 100644 index 0000000000..13ae0ed20c --- /dev/null +++ b/lib/linalg/dlasq5.cpp @@ -0,0 +1,480 @@ +/* fortran/dlasq5.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASQ5 computes one dqds transform in ping-pong form. Used by sbdsqr and sstegr. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASQ5 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, DN, */ +/* DNM1, DNM2, IEEE, EPS ) */ + +/* .. Scalar Arguments .. */ +/* LOGICAL IEEE */ +/* INTEGER I0, N0, PP */ +/* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU, SIGMA, EPS */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASQ5 computes one dqds transform in ping-pong form, one */ +/* > version for IEEE machines another for non IEEE machines. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] I0 */ +/* > \verbatim */ +/* > I0 is INTEGER */ +/* > First index. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N0 */ +/* > \verbatim */ +/* > N0 is INTEGER */ +/* > Last index. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( 4*N ) */ +/* > Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ +/* > an extra argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PP */ +/* > \verbatim */ +/* > PP is INTEGER */ +/* > PP=0 for ping, PP=1 for pong. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION */ +/* > This is the shift. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SIGMA */ +/* > \verbatim */ +/* > SIGMA is DOUBLE PRECISION */ +/* > This is the accumulated shift up to this step. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DMIN */ +/* > \verbatim */ +/* > DMIN is DOUBLE PRECISION */ +/* > Minimum value of d. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DMIN1 */ +/* > \verbatim */ +/* > DMIN1 is DOUBLE PRECISION */ +/* > Minimum value of d, excluding D( N0 ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DMIN2 */ +/* > \verbatim */ +/* > DMIN2 is DOUBLE PRECISION */ +/* > Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DN */ +/* > \verbatim */ +/* > DN is DOUBLE PRECISION */ +/* > d(N0), the last value of d. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DNM1 */ +/* > \verbatim */ +/* > DNM1 is DOUBLE PRECISION */ +/* > d(N0-1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DNM2 */ +/* > \verbatim */ +/* > DNM2 is DOUBLE PRECISION */ +/* > d(N0-2). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IEEE */ +/* > \verbatim */ +/* > IEEE is LOGICAL */ +/* > Flag for IEEE or non IEEE arithmetic. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] EPS */ +/* > \verbatim */ +/* > EPS is DOUBLE PRECISION */ +/* > This is the value of epsilon used. */ +/* > \endverbatim */ +/* > */ +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *tau, doublereal *sigma, doublereal *dmin__, + doublereal *dmin1, doublereal *dmin2, doublereal *dn, doublereal * + dnm1, doublereal *dnm2, logical *ieee, doublereal *eps) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal d__; + integer j4, j4p2; + doublereal emin, temp, dthresh; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameter .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + dthresh = *eps * (*sigma + *tau); + if (*tau < dthresh * .5) { + *tau = 0.; + } + if (*tau != 0.) { + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4] - *tau; + *dmin__ = d__; + *dmin1 = -z__[j4]; + + if (*ieee) { + +/* Code for IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + temp = z__[j4 + 1] / z__[j4 - 2]; + d__ = d__ * temp - *tau; + *dmin__ = min(*dmin__,d__); + z__[j4] = z__[j4 - 1] * temp; +/* Computing MIN */ + d__1 = z__[j4]; + emin = min(d__1,emin); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + temp = z__[j4 + 2] / z__[j4 - 3]; + d__ = d__ * temp - *tau; + *dmin__ = min(*dmin__,d__); + z__[j4 - 1] = z__[j4] * temp; +/* Computing MIN */ + d__1 = z__[j4 - 1]; + emin = min(d__1,emin); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dn); + + } else { + +/* Code for non IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (d__ < 0.) { + return 0; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L30: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (d__ < 0.) { + return 0; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L40: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (*dnm2 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (*dnm1 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dn); + + } + } else { +/* This is the version that sets d's to zero if they are small enough */ + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4] - *tau; + *dmin__ = d__; + *dmin1 = -z__[j4]; + if (*ieee) { + +/* Code for IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + temp = z__[j4 + 1] / z__[j4 - 2]; + d__ = d__ * temp - *tau; + if (d__ < dthresh) { + d__ = 0.; + } + *dmin__ = min(*dmin__,d__); + z__[j4] = z__[j4 - 1] * temp; +/* Computing MIN */ + d__1 = z__[j4]; + emin = min(d__1,emin); +/* L50: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + temp = z__[j4 + 2] / z__[j4 - 3]; + d__ = d__ * temp - *tau; + if (d__ < dthresh) { + d__ = 0.; + } + *dmin__ = min(*dmin__,d__); + z__[j4 - 1] = z__[j4] * temp; +/* Computing MIN */ + d__1 = z__[j4 - 1]; + emin = min(d__1,emin); +/* L60: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + *dmin__ = min(*dmin__,*dn); + + } else { + +/* Code for non IEEE arithmetic. */ + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (d__ < 0.) { + return 0; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; + } + if (d__ < dthresh) { + d__ = 0.; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L70: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (d__ < 0.) { + return 0; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; + } + if (d__ < dthresh) { + d__ = 0.; + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L80: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (*dnm2 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (*dnm1 < 0.) { + return 0; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; + } + *dmin__ = min(*dmin__,*dn); + + } + } + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of DLASQ5 */ + +} /* dlasq5_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasq6.cpp b/lib/linalg/dlasq6.cpp new file mode 100644 index 0000000000..b06e9ad028 --- /dev/null +++ b/lib/linalg/dlasq6.cpp @@ -0,0 +1,291 @@ +/* fortran/dlasq6.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASQ6 computes one dqd transform in ping-pong form. Used by sbdsqr and sstegr. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASQ6 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, */ +/* DNM1, DNM2 ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER I0, N0, PP */ +/* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION Z( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASQ6 computes one dqd (shift equal to zero) transform in */ +/* > ping-pong form, with protection against underflow and overflow. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] I0 */ +/* > \verbatim */ +/* > I0 is INTEGER */ +/* > First index. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N0 */ +/* > \verbatim */ +/* > N0 is INTEGER */ +/* > Last index. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension ( 4*N ) */ +/* > Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ +/* > an extra argument. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PP */ +/* > \verbatim */ +/* > PP is INTEGER */ +/* > PP=0 for ping, PP=1 for pong. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DMIN */ +/* > \verbatim */ +/* > DMIN is DOUBLE PRECISION */ +/* > Minimum value of d. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DMIN1 */ +/* > \verbatim */ +/* > DMIN1 is DOUBLE PRECISION */ +/* > Minimum value of d, excluding D( N0 ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DMIN2 */ +/* > \verbatim */ +/* > DMIN2 is DOUBLE PRECISION */ +/* > Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DN */ +/* > \verbatim */ +/* > DN is DOUBLE PRECISION */ +/* > d(N0), the last value of d. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DNM1 */ +/* > \verbatim */ +/* > DNM1 is DOUBLE PRECISION */ +/* > d(N0-1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DNM2 */ +/* > \verbatim */ +/* > DNM2 is DOUBLE PRECISION */ +/* > d(N0-2). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__, + integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2, + doublereal *dn, doublereal *dnm1, doublereal *dnm2) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + + /* Local variables */ + doublereal d__; + integer j4, j4p2; + doublereal emin, temp; + extern doublereal dlamch_(char *, ftnlen); + doublereal safmin; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameter .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Function .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --z__; + + /* Function Body */ + if (*n0 - *i0 - 1 <= 0) { + return 0; + } + + safmin = dlamch_((char *)"Safe minimum", (ftnlen)12); + j4 = (*i0 << 2) + *pp - 3; + emin = z__[j4 + 4]; + d__ = z__[j4]; + *dmin__ = d__; + + if (*pp == 0) { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 2] = d__ + z__[j4 - 1]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + d__ = z__[j4 + 1]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4 + - 2] < z__[j4 + 1]) { + temp = z__[j4 + 1] / z__[j4 - 2]; + z__[j4] = z__[j4 - 1] * temp; + d__ *= temp; + } else { + z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); + d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4]; + emin = min(d__1,d__2); +/* L10: */ + } + } else { + i__1 = *n0 - 3 << 2; + for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { + z__[j4 - 3] = d__ + z__[j4]; + if (z__[j4 - 3] == 0.) { + z__[j4 - 1] = 0.; + d__ = z__[j4 + 2]; + *dmin__ = d__; + emin = 0.; + } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4 + - 3] < z__[j4 + 2]) { + temp = z__[j4 + 2] / z__[j4 - 3]; + z__[j4 - 1] = z__[j4] * temp; + d__ *= temp; + } else { + z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); + d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]); + } + *dmin__ = min(*dmin__,d__); +/* Computing MIN */ + d__1 = emin, d__2 = z__[j4 - 1]; + emin = min(d__1,d__2); +/* L20: */ + } + } + +/* Unroll last two steps. */ + + *dnm2 = d__; + *dmin2 = *dmin__; + j4 = (*n0 - 2 << 2) - *pp; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm2 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dnm1 = z__[j4p2 + 2]; + *dmin__ = *dnm1; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dnm1 = *dnm2 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,*dnm1); + + *dmin1 = *dmin__; + j4 += 4; + j4p2 = j4 + (*pp << 1) - 1; + z__[j4 - 2] = *dnm1 + z__[j4p2]; + if (z__[j4 - 2] == 0.) { + z__[j4] = 0.; + *dn = z__[j4p2 + 2]; + *dmin__ = *dn; + emin = 0.; + } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] < + z__[j4p2 + 2]) { + temp = z__[j4p2 + 2] / z__[j4 - 2]; + z__[j4] = z__[j4p2] * temp; + *dn = *dnm1 * temp; + } else { + z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); + *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]); + } + *dmin__ = min(*dmin__,*dn); + + z__[j4 + 2] = *dn; + z__[(*n0 << 2) - *pp] = emin; + return 0; + +/* End of DLASQ6 */ + +} /* dlasq6_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasr.cpp b/lib/linalg/dlasr.cpp new file mode 100644 index 0000000000..c4de2f85a9 --- /dev/null +++ b/lib/linalg/dlasr.cpp @@ -0,0 +1,536 @@ +/* fortran/dlasr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASR applies a sequence of plane rotations to a general rectangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIRECT, PIVOT, SIDE */ +/* INTEGER LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( * ), S( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASR applies a sequence of plane rotations to a real matrix A, */ +/* > from either the left or the right. */ +/* > */ +/* > When SIDE = 'L', the transformation takes the form */ +/* > */ +/* > A := P*A */ +/* > */ +/* > and when SIDE = 'R', the transformation takes the form */ +/* > */ +/* > A := A*P**T */ +/* > */ +/* > where P is an orthogonal matrix consisting of a sequence of z plane */ +/* > rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', */ +/* > and P**T is the transpose of P. */ +/* > */ +/* > When DIRECT = 'F' (Forward sequence), then */ +/* > */ +/* > P = P(z-1) * ... * P(2) * P(1) */ +/* > */ +/* > and when DIRECT = 'B' (Backward sequence), then */ +/* > */ +/* > P = P(1) * P(2) * ... * P(z-1) */ +/* > */ +/* > where P(k) is a plane rotation matrix defined by the 2-by-2 rotation */ +/* > */ +/* > R(k) = ( c(k) s(k) ) */ +/* > = ( -s(k) c(k) ). */ +/* > */ +/* > When PIVOT = 'V' (Variable pivot), the rotation is performed */ +/* > for the plane (k,k+1), i.e., P(k) has the form */ +/* > */ +/* > P(k) = ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > ( c(k) s(k) ) */ +/* > ( -s(k) c(k) ) */ +/* > ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > */ +/* > where R(k) appears as a rank-2 modification to the identity matrix in */ +/* > rows and columns k and k+1. */ +/* > */ +/* > When PIVOT = 'T' (Top pivot), the rotation is performed for the */ +/* > plane (1,k+1), so P(k) has the form */ +/* > */ +/* > P(k) = ( c(k) s(k) ) */ +/* > ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > ( -s(k) c(k) ) */ +/* > ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > */ +/* > where R(k) appears in rows and columns 1 and k+1. */ +/* > */ +/* > Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is */ +/* > performed for the plane (k,z), giving P(k) the form */ +/* > */ +/* > P(k) = ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > ( c(k) s(k) ) */ +/* > ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > ( -s(k) c(k) ) */ +/* > */ +/* > where R(k) appears in rows and columns k and z. The rotations are */ +/* > performed without ever forming P(k) explicitly. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > Specifies whether the plane rotation matrix P is applied to */ +/* > A on the left or the right. */ +/* > = 'L': Left, compute A := P*A */ +/* > = 'R': Right, compute A:= A*P**T */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PIVOT */ +/* > \verbatim */ +/* > PIVOT is CHARACTER*1 */ +/* > Specifies the plane for which P(k) is a plane rotation */ +/* > matrix. */ +/* > = 'V': Variable pivot, the plane (k,k+1) */ +/* > = 'T': Top pivot, the plane (1,k+1) */ +/* > = 'B': Bottom pivot, the plane (k,z) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Specifies whether P is a forward or backward sequence of */ +/* > plane rotations. */ +/* > = 'F': Forward, P = P(z-1)*...*P(2)*P(1) */ +/* > = 'B': Backward, P = P(1)*P(2)*...*P(z-1) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. If m <= 1, an immediate */ +/* > return is effected. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. If n <= 1, an */ +/* > immediate return is effected. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension */ +/* > (M-1) if SIDE = 'L' */ +/* > (N-1) if SIDE = 'R' */ +/* > The cosines c(k) of the plane rotations. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension */ +/* > (M-1) if SIDE = 'L' */ +/* > (N-1) if SIDE = 'R' */ +/* > The sines s(k) of the plane rotations. The 2-by-2 plane */ +/* > rotation part of the matrix P(k), R(k), has the form */ +/* > R(k) = ( c(k) s(k) ) */ +/* > ( -s(k) c(k) ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The M-by-N matrix A. On exit, A is overwritten by P*A if */ +/* > SIDE = 'L' or by A*P**T if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublereal *a, integer * + lda, ftnlen side_len, ftnlen pivot_len, ftnlen direct_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, info; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) || lsame_(side, (char *)"R", ( + ftnlen)1, (ftnlen)1))) { + info = 1; + } else if (! (lsame_(pivot, (char *)"V", (ftnlen)1, (ftnlen)1) || lsame_(pivot, + (char *)"T", (ftnlen)1, (ftnlen)1) || lsame_(pivot, (char *)"B", (ftnlen)1, ( + ftnlen)1))) { + info = 2; + } else if (! (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1) || lsame_(direct, + (char *)"B", (ftnlen)1, (ftnlen)1))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_((char *)"DLASR ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form P * A */ + + if (lsame_(pivot, (char *)"V", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + 1 + i__ * a_dim1]; + a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp * + a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j + + i__ * a_dim1]; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, (char *)"T", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = ctemp * temp - stemp * a[ + i__ * a_dim1 + 1]; + a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[ + i__ * a_dim1 + 1]; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[j + i__ * a_dim1]; + a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1] + + ctemp * temp; + a[*m + i__ * a_dim1] = ctemp * a[*m + i__ * + a_dim1] - stemp * temp; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form A * P**T */ + + if (lsame_(pivot, (char *)"V", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + (j + 1) * a_dim1]; + a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp * + a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * temp + ctemp * a[ + i__ + j * a_dim1]; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, (char *)"T", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = ctemp * temp - stemp * a[ + i__ + a_dim1]; + a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ + + a_dim1]; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1] + + ctemp * temp; + a[i__ + *n * a_dim1] = ctemp * a[i__ + *n * + a_dim1] - stemp * temp; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of DLASR */ + +} /* dlasr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasrt.cpp b/lib/linalg/dlasrt.cpp new file mode 100644 index 0000000000..8c0eec5f2e --- /dev/null +++ b/lib/linalg/dlasrt.cpp @@ -0,0 +1,352 @@ +/* fortran/dlasrt.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASRT sorts numbers in increasing or decreasing order. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASRT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASRT( ID, N, D, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER ID */ +/* INTEGER INFO, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Sort the numbers in D in increasing order (if ID = 'I') or */ +/* > in decreasing order (if ID = 'D' ). */ +/* > */ +/* > Use Quick Sort, reverting to Insertion sort on arrays of */ +/* > size <= 20. Dimension of STACK limits N to about 2**32. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ID */ +/* > \verbatim */ +/* > ID is CHARACTER*1 */ +/* > = 'I': sort D in increasing order; */ +/* > = 'D': sort D in decreasing order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The length of the array D. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the array to be sorted. */ +/* > On exit, D has been sorted into increasing order */ +/* > (D(1) <= ... <= D(N) ) or into decreasing order */ +/* > (D(1) >= ... >= D(N) ), depending on ID. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer * + info, ftnlen id_len) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j; + doublereal d1, d2, d3; + integer dir; + doublereal tmp; + integer endd; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer stack[64] /* was [2][32] */; + doublereal dmnmx; + integer start; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer stkpnt; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + + /* Function Body */ + *info = 0; + dir = -1; + if (lsame_(id, (char *)"D", (ftnlen)1, (ftnlen)1)) { + dir = 0; + } else if (lsame_(id, (char *)"I", (ftnlen)1, (ftnlen)1)) { + dir = 1; + } + if (dir == -1) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASRT", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 1) { + return 0; + } + + stkpnt = 1; + stack[0] = 1; + stack[1] = *n; +L10: + start = stack[(stkpnt << 1) - 2]; + endd = stack[(stkpnt << 1) - 1]; + --stkpnt; + if (endd - start <= 20 && endd - start > 0) { + +/* Do Insertion sort on D( START:ENDD ) */ + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] > d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L30; + } +/* L20: */ + } +L30: + ; + } + + } else { + +/* Sort into increasing order */ + + i__1 = endd; + for (i__ = start + 1; i__ <= i__1; ++i__) { + i__2 = start + 1; + for (j = i__; j >= i__2; --j) { + if (d__[j] < d__[j - 1]) { + dmnmx = d__[j]; + d__[j] = d__[j - 1]; + d__[j - 1] = dmnmx; + } else { + goto L50; + } +/* L40: */ + } +L50: + ; + } + + } + + } else if (endd - start > 20) { + +/* Partition D( START:ENDD ) and stack parts, largest one first */ + +/* Choose partition entry as median of 3 */ + + d1 = d__[start]; + d2 = d__[endd]; + i__ = (start + endd) / 2; + d3 = d__[i__]; + if (d1 < d2) { + if (d3 < d1) { + dmnmx = d1; + } else if (d3 < d2) { + dmnmx = d3; + } else { + dmnmx = d2; + } + } else { + if (d3 < d2) { + dmnmx = d2; + } else if (d3 < d1) { + dmnmx = d3; + } else { + dmnmx = d1; + } + } + + if (dir == 0) { + +/* Sort into decreasing order */ + + i__ = start - 1; + j = endd + 1; +L60: +L70: + --j; + if (d__[j] < dmnmx) { + goto L70; + } +L80: + ++i__; + if (d__[i__] > dmnmx) { + goto L80; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L60; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } else { + +/* Sort into increasing order */ + + i__ = start - 1; + j = endd + 1; +L90: +L100: + --j; + if (d__[j] > dmnmx) { + goto L100; + } +L110: + ++i__; + if (d__[i__] < dmnmx) { + goto L110; + } + if (i__ < j) { + tmp = d__[i__]; + d__[i__] = d__[j]; + d__[j] = tmp; + goto L90; + } + if (j - start > endd - j - 1) { + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + } else { + ++stkpnt; + stack[(stkpnt << 1) - 2] = j + 1; + stack[(stkpnt << 1) - 1] = endd; + ++stkpnt; + stack[(stkpnt << 1) - 2] = start; + stack[(stkpnt << 1) - 1] = j; + } + } + } + if (stkpnt > 0) { + goto L10; + } + return 0; + +/* End of DLASRT */ + +} /* dlasrt_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlassq.cpp b/lib/linalg/dlassq.cpp new file mode 100644 index 0000000000..fe346d2247 --- /dev/null +++ b/lib/linalg/dlassq.cpp @@ -0,0 +1,191 @@ +/* fortran/dlassq.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASSQ updates a sum of squares represented in scaled form. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASSQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASSQ( N, X, INCX, SCALE, SUMSQ ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX, N */ +/* DOUBLE PRECISION SCALE, SUMSQ */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION X( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASSQ returns the values scl and smsq such that */ +/* > */ +/* > ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */ +/* > */ +/* > where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is */ +/* > assumed to be non-negative and scl returns the value */ +/* > */ +/* > scl = max( scale, abs( x( i ) ) ). */ +/* > */ +/* > scale and sumsq must be supplied in SCALE and SUMSQ and */ +/* > scl and smsq are overwritten on SCALE and SUMSQ respectively. */ +/* > */ +/* > The routine makes only one pass through the vector x. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of elements to be used from the vector X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (N) */ +/* > The vector for which a scaled sum of squares is computed. */ +/* > x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The increment between successive values of the vector X. */ +/* > INCX > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION */ +/* > On entry, the value scale in the equation above. */ +/* > On exit, SCALE is overwritten with scl , the scaling factor */ +/* > for the sum of squares. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SUMSQ */ +/* > \verbatim */ +/* > SUMSQ is DOUBLE PRECISION */ +/* > On entry, the value sumsq in the equation above. */ +/* > On exit, SUMSQ is overwritten with smsq , the basic sum of */ +/* > squares from which scl has been factored out. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx, + doublereal *scale, doublereal *sumsq) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer ix; + doublereal absxi; + extern logical disnan_(doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + absxi = (d__1 = x[ix], abs(d__1)); + if (absxi > 0. || disnan_(&absxi)) { + if (*scale < absxi) { +/* Computing 2nd power */ + d__1 = *scale / absxi; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / *scale; + *sumsq += d__1 * d__1; + } + } +/* L10: */ + } + } + return 0; + +/* End of DLASSQ */ + +} /* dlassq_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlasv2.cpp b/lib/linalg/dlasv2.cpp new file mode 100644 index 0000000000..4667b941d4 --- /dev/null +++ b/lib/linalg/dlasv2.cpp @@ -0,0 +1,356 @@ +/* fortran/dlasv2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b3 = 2.; +static doublereal c_b4 = 1.; + +/* > \brief \b DLASV2 computes the singular value decomposition of a 2-by-2 triangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASV2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASV2 computes the singular value decomposition of a 2-by-2 */ +/* > triangular matrix */ +/* > [ F G ] */ +/* > [ 0 H ]. */ +/* > On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the */ +/* > smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and */ +/* > right singular vectors for abs(SSMAX), giving the decomposition */ +/* > */ +/* > [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] */ +/* > [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] F */ +/* > \verbatim */ +/* > F is DOUBLE PRECISION */ +/* > The (1,1) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] G */ +/* > \verbatim */ +/* > G is DOUBLE PRECISION */ +/* > The (1,2) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] H */ +/* > \verbatim */ +/* > H is DOUBLE PRECISION */ +/* > The (2,2) element of the 2-by-2 matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SSMIN */ +/* > \verbatim */ +/* > SSMIN is DOUBLE PRECISION */ +/* > abs(SSMIN) is the smaller singular value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SSMAX */ +/* > \verbatim */ +/* > SSMAX is DOUBLE PRECISION */ +/* > abs(SSMAX) is the larger singular value. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SNL */ +/* > \verbatim */ +/* > SNL is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[out] CSL */ +/* > \verbatim */ +/* > CSL is DOUBLE PRECISION */ +/* > The vector (CSL, SNL) is a unit left singular vector for the */ +/* > singular value abs(SSMAX). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SNR */ +/* > \verbatim */ +/* > SNR is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[out] CSR */ +/* > \verbatim */ +/* > CSR is DOUBLE PRECISION */ +/* > The vector (CSR, SNR) is a unit right singular vector for the */ +/* > singular value abs(SSMAX). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Any input parameter may be aliased with any output parameter. */ +/* > */ +/* > Barring over/underflow and assuming a guard digit in subtraction, all */ +/* > output quantities are correct to within a few units in the last */ +/* > place (ulps). */ +/* > */ +/* > In IEEE arithmetic, the code works correctly if one matrix element is */ +/* > infinite. */ +/* > */ +/* > Overflow will not occur unless the largest singular value itself */ +/* > overflows or is within a few ulps of overflow. (On machines with */ +/* > partial overflow, like the Cray, overflow may occur if the largest */ +/* > singular value is within a factor of 2 of overflow.) */ +/* > */ +/* > Underflow is harmless if underflow is gradual. Otherwise, results */ +/* > may correspond to a matrix modified by perturbations of size near */ +/* > the underflow threshold. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__, + doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal * + csr, doublereal *snl, doublereal *csl) +{ + /* System generated locals */ + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt, + crt, slt, srt; + integer pmax; + doublereal temp; + logical swap; + doublereal tsign; + extern doublereal dlamch_(char *, ftnlen); + logical gasmal; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + ft = *f; + fa = abs(ft); + ht = *h__; + ha = abs(*h__); + +/* PMAX points to the maximum absolute element of matrix */ +/* PMAX = 1 if F largest in absolute values */ +/* PMAX = 2 if G largest in absolute values */ +/* PMAX = 3 if H largest in absolute values */ + + pmax = 1; + swap = ha > fa; + if (swap) { + pmax = 3; + temp = ft; + ft = ht; + ht = temp; + temp = fa; + fa = ha; + ha = temp; + +/* Now FA .ge. HA */ + + } + gt = *g; + ga = abs(gt); + if (ga == 0.) { + +/* Diagonal matrix */ + + *ssmin = ha; + *ssmax = fa; + clt = 1.; + crt = 1.; + slt = 0.; + srt = 0.; + } else { + gasmal = TRUE_; + if (ga > fa) { + pmax = 2; + if (fa / ga < dlamch_((char *)"EPS", (ftnlen)3)) { + +/* Case of very large GA */ + + gasmal = FALSE_; + *ssmax = ga; + if (ha > 1.) { + *ssmin = fa / (ga / ha); + } else { + *ssmin = fa / ga * ha; + } + clt = 1.; + slt = ht / gt; + srt = 1.; + crt = ft / gt; + } + } + if (gasmal) { + +/* Normal case */ + + d__ = fa - ha; + if (d__ == fa) { + +/* Copes with infinite F or H */ + + l = 1.; + } else { + l = d__ / fa; + } + +/* Note that 0 .le. L .le. 1 */ + + m = gt / ft; + +/* Note that abs(M) .le. 1/macheps */ + + t = 2. - l; + +/* Note that T .ge. 1 */ + + mm = m * m; + tt = t * t; + s = sqrt(tt + mm); + +/* Note that 1 .le. S .le. 1 + 1/macheps */ + + if (l == 0.) { + r__ = abs(m); + } else { + r__ = sqrt(l * l + mm); + } + +/* Note that 0 .le. R .le. 1 + 1/macheps */ + + a = (s + r__) * .5; + +/* Note that 1 .le. A .le. 1 + abs(M) */ + + *ssmin = ha / a; + *ssmax = fa * a; + if (mm == 0.) { + +/* Note that M is very tiny */ + + if (l == 0.) { + t = d_sign(&c_b3, &ft) * d_sign(&c_b4, >); + } else { + t = gt / d_sign(&d__, &ft) + m / t; + } + } else { + t = (m / (s + t) + m / (r__ + l)) * (a + 1.); + } + l = sqrt(t * t + 4.); + crt = 2. / l; + srt = t / l; + clt = (crt + srt * m) / a; + slt = ht / ft * srt / a; + } + } + if (swap) { + *csl = srt; + *snl = crt; + *csr = slt; + *snr = clt; + } else { + *csl = clt; + *snl = slt; + *csr = crt; + *snr = srt; + } + +/* Correct signs of SSMAX and SSMIN */ + + if (pmax == 1) { + tsign = d_sign(&c_b4, csr) * d_sign(&c_b4, csl) * d_sign(&c_b4, f); + } + if (pmax == 2) { + tsign = d_sign(&c_b4, snr) * d_sign(&c_b4, csl) * d_sign(&c_b4, g); + } + if (pmax == 3) { + tsign = d_sign(&c_b4, snr) * d_sign(&c_b4, snl) * d_sign(&c_b4, h__); + } + *ssmax = d_sign(ssmax, &tsign); + d__1 = tsign * d_sign(&c_b4, f) * d_sign(&c_b4, h__); + *ssmin = d_sign(ssmin, &d__1); + return 0; + +/* End of DLASV2 */ + +} /* dlasv2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlaswp.cpp b/lib/linalg/dlaswp.cpp new file mode 100644 index 0000000000..db83b75947 --- /dev/null +++ b/lib/linalg/dlaswp.cpp @@ -0,0 +1,238 @@ +/* fortran/dlaswp.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DLASWP performs a series of row interchanges on a general rectangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLASWP + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX, K1, K2, LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IPIV( * ) */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLASWP performs a series of row interchanges on the matrix A. */ +/* > One row interchange is initiated for each of rows K1 through K2 of A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the matrix of column dimension N to which the row */ +/* > interchanges will be applied. */ +/* > On exit, the permuted matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K1 */ +/* > \verbatim */ +/* > K1 is INTEGER */ +/* > The first element of IPIV for which a row interchange will */ +/* > be done. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K2 */ +/* > \verbatim */ +/* > K2 is INTEGER */ +/* > (K2-K1+1) is the number of elements of IPIV for which a row */ +/* > interchange will be done. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IPIV */ +/* > \verbatim */ +/* > IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) */ +/* > The vector of pivot indices. Only the elements in positions */ +/* > K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. */ +/* > IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be */ +/* > interchanged. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The increment between successive values of IPIV. If INCX */ +/* > is negative, the pivots are applied in reverse order. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Modified by */ +/* > R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer + *k1, integer *k2, integer *ipiv, integer *incx) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + doublereal temp; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows */ +/* K1 through K2. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + + /* Function Body */ + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = *k1 + (*k1 - *k2) * *incx; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) + { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L10: */ + } + } + ix += *incx; +/* L20: */ + } +/* L30: */ + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + temp = a[i__ + k * a_dim1]; + a[i__ + k * a_dim1] = a[ip + k * a_dim1]; + a[ip + k * a_dim1] = temp; +/* L40: */ + } + } + ix += *incx; +/* L50: */ + } + } + + return 0; + +/* End of DLASWP */ + +} /* dlaswp_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlatrd.cpp b/lib/linalg/dlatrd.cpp new file mode 100644 index 0000000000..b291e1adbf --- /dev/null +++ b/lib/linalg/dlatrd.cpp @@ -0,0 +1,445 @@ +/* fortran/dlatrd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b5 = -1.; +static doublereal c_b6 = 1.; +static integer c__1 = 1; +static doublereal c_b16 = 0.; + +/* > \brief \b DLATRD reduces the first nb rows and columns of a symmetric/Hermitian matrix A to real tridiago +nal form by an orthogonal similarity transformation. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLATRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER LDA, LDW, N, NB */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLATRD reduces NB rows and columns of a real symmetric matrix A to */ +/* > symmetric tridiagonal form by an orthogonal similarity */ +/* > transformation Q**T * A * Q, and returns the matrices V and W which are */ +/* > needed to apply the transformation to the unreduced part of A. */ +/* > */ +/* > If UPLO = 'U', DLATRD reduces the last NB rows and columns of a */ +/* > matrix, of which the upper triangle is supplied; */ +/* > if UPLO = 'L', DLATRD reduces the first NB rows and columns of a */ +/* > matrix, of which the lower triangle is supplied. */ +/* > */ +/* > This is an auxiliary routine called by DSYTRD. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > symmetric matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER */ +/* > The number of rows and columns to be reduced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit: */ +/* > if UPLO = 'U', the last NB columns have been reduced to */ +/* > tridiagonal form, with the diagonal elements overwriting */ +/* > the diagonal elements of A; the elements above the diagonal */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors; */ +/* > if UPLO = 'L', the first NB columns have been reduced to */ +/* > tridiagonal form, with the diagonal elements overwriting */ +/* > the diagonal elements of A; the elements below the diagonal */ +/* > with the array TAU, represent the orthogonal matrix Q as a */ +/* > product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= (1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal */ +/* > elements of the last NB columns of the reduced matrix; */ +/* > if UPLO = 'L', E(1:nb) contains the subdiagonal elements of */ +/* > the first NB columns of the reduced matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors, stored in */ +/* > TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (LDW,NB) */ +/* > The n-by-nb matrix W required to update the unreduced part */ +/* > of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDW */ +/* > \verbatim */ +/* > LDW is INTEGER */ +/* > The leading dimension of the array W. LDW >= max(1,N). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n) H(n-1) . . . H(n-nb+1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), */ +/* > and tau in TAU(i-1). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(nb). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The elements of the vectors v together form the n-by-nb matrix V */ +/* > which is needed, with W, to apply the transformation to the unreduced */ +/* > part of the matrix, using a symmetric rank-2k update of the form: */ +/* > A := A - V*W**T - W*V**T. */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5 and nb = 2: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( a a a v4 v5 ) ( d ) */ +/* > ( a a v4 v5 ) ( 1 d ) */ +/* > ( a 1 v5 ) ( v1 1 a ) */ +/* > ( d 1 ) ( v1 v2 a a ) */ +/* > ( d ) ( v1 v2 a a a ) */ +/* > */ +/* > where d denotes a diagonal element of the reduced matrix, a denotes */ +/* > an element of the original matrix that is unchanged, and vi denotes */ +/* > an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal * + a, integer *lda, doublereal *e, doublereal *tau, doublereal *w, + integer *ldw, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, iw; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal alpha; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen), daxpy_(integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *), + dsymv_(char *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen), dlarfg_(integer *, doublereal *, doublereal *, integer *, + doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = *n - i__; + dgemv_((char *)"No transpose", &i__, &i__2, &c_b5, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b6, &a[i__ * a_dim1 + 1], &c__1, (ftnlen)12); + i__2 = *n - i__; + dgemv_((char *)"No transpose", &i__, &i__2, &c_b5, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b6, &a[i__ * a_dim1 + 1], &c__1, (ftnlen)12); + } + if (i__ > 1) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(1:i-2,i) */ + + i__2 = i__ - 1; + dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 + + 1], &c__1, &tau[i__ - 1]); + e[i__ - 1] = a[i__ - 1 + i__ * a_dim1]; + a[i__ - 1 + i__ * a_dim1] = 1.; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + dsymv_((char *)"Upper", &i__2, &c_b6, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b16, &w[iw * w_dim1 + 1], & + c__1, (ftnlen)5); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b6, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, & + c_b16, &w[i__ + 1 + iw * w_dim1], &c__1, (ftnlen) + 9); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1, (ftnlen) + 12); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b6, &a[(i__ + 1) * + a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, & + c_b16, &w[i__ + 1 + iw * w_dim1], &c__1, (ftnlen) + 9); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &w[(iw + 1) * + w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1, (ftnlen) + 12); + } + i__2 = i__ - 1; + dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + i__2 = i__ - 1; + alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1], + &c__1, &a[i__ * a_dim1 + 1], &c__1); + i__2 = i__ - 1; + daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda, + &w[i__ + w_dim1], ldw, &c_b6, &a[i__ + i__ * a_dim1], & + c__1, (ftnlen)12); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &w[i__ + w_dim1], ldw, + &a[i__ + a_dim1], lda, &c_b6, &a[i__ + i__ * a_dim1], & + c__1, (ftnlen)12); + if (i__ < *n) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(i+2:n,i) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + + i__ * a_dim1], &c__1, &tau[i__]); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + dsymv_((char *)"Lower", &i__2, &c_b6, &a[i__ + 1 + (i__ + 1) * a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ + i__ + 1 + i__ * w_dim1], &c__1, (ftnlen)5); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b6, &w[i__ + 1 + w_dim1], + ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ + i__ * w_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[ + i__ + 1 + i__ * w_dim1], &c__1, (ftnlen)12); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b6, &a[i__ + 1 + a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[ + i__ * w_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b5, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[ + i__ + 1 + i__ * w_dim1], &c__1, (ftnlen)12); + i__2 = *n - i__; + dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + i__2 = *n - i__; + alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ * + w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of DLATRD */ + +} /* dlatrd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dlatrs.cpp b/lib/linalg/dlatrs.cpp new file mode 100644 index 0000000000..0b68eac917 --- /dev/null +++ b/lib/linalg/dlatrs.cpp @@ -0,0 +1,984 @@ +/* fortran/dlatrs.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b46 = .5; + +/* > \brief \b DLATRS solves a triangular system of equations with the scale factor set to prevent overflow. +*/ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DLATRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, */ +/* CNORM, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIAG, NORMIN, TRANS, UPLO */ +/* INTEGER INFO, LDA, N */ +/* DOUBLE PRECISION SCALE */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), CNORM( * ), X( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DLATRS solves one of the triangular systems */ +/* > */ +/* > A *x = s*b or A**T *x = s*b */ +/* > */ +/* > with scaling to prevent overflow. Here A is an upper or lower */ +/* > triangular matrix, A**T denotes the transpose of A, x and b are */ +/* > n-element vectors, and s is a scaling factor, usually less than */ +/* > or equal to 1, chosen so that the components of x will be less than */ +/* > the overflow threshold. If the unscaled problem will not cause */ +/* > overflow, the Level 2 BLAS routine DTRSV is called. If the matrix A */ +/* > is singular (A(j,j) = 0 for some j), then s is set to 0 and a */ +/* > non-trivial solution to A*x = 0 is returned. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the matrix A is upper or lower triangular. */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > Specifies the operation applied to A. */ +/* > = 'N': Solve A * x = s*b (No transpose) */ +/* > = 'T': Solve A**T* x = s*b (Transpose) */ +/* > = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > Specifies whether or not the matrix A is unit triangular. */ +/* > = 'N': Non-unit triangular */ +/* > = 'U': Unit triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NORMIN */ +/* > \verbatim */ +/* > NORMIN is CHARACTER*1 */ +/* > Specifies whether CNORM has been set or not. */ +/* > = 'Y': CNORM contains the column norms on entry */ +/* > = 'N': CNORM is not set on entry. On exit, the norms will */ +/* > be computed and stored in CNORM. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The triangular matrix A. If UPLO = 'U', the leading n by n */ +/* > upper triangular part of the array A contains the upper */ +/* > triangular matrix, and the strictly lower triangular part of */ +/* > A is not referenced. If UPLO = 'L', the leading n by n lower */ +/* > triangular part of the array A contains the lower triangular */ +/* > matrix, and the strictly upper triangular part of A is not */ +/* > referenced. If DIAG = 'U', the diagonal elements of A are */ +/* > also not referenced and are assumed to be 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max (1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the right hand side b of the triangular system. */ +/* > On exit, X is overwritten by the solution vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION */ +/* > The scaling factor s for the triangular system */ +/* > A * x = s*b or A**T* x = s*b. */ +/* > If SCALE = 0, the matrix A is singular or badly scaled, and */ +/* > the vector x is an exact or approximate solution to A*x = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] CNORM */ +/* > \verbatim */ +/* > CNORM is DOUBLE PRECISION array, dimension (N) */ +/* > */ +/* > If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */ +/* > contains the norm of the off-diagonal part of the j-th column */ +/* > of A. If TRANS = 'N', CNORM(j) must be greater than or equal */ +/* > to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */ +/* > must be greater than or equal to the 1-norm. */ +/* > */ +/* > If NORMIN = 'N', CNORM is an output argument and CNORM(j) */ +/* > returns the 1-norm of the offdiagonal part of the j-th column */ +/* > of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > A rough bound on x is computed; if that is less than overflow, DTRSV */ +/* > is called, otherwise, specific code is used which checks for possible */ +/* > overflow or divide-by-zero at every operation. */ +/* > */ +/* > A columnwise scheme is used for solving A*x = b. The basic algorithm */ +/* > if A is lower triangular is */ +/* > */ +/* > x[1:n] := b[1:n] */ +/* > for j = 1, ..., n */ +/* > x(j) := x(j) / A(j,j) */ +/* > x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */ +/* > end */ +/* > */ +/* > Define bounds on the components of x after j iterations of the loop: */ +/* > M(j) = bound on x[1:j] */ +/* > G(j) = bound on x[j+1:n] */ +/* > Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */ +/* > */ +/* > Then for iteration j+1 we have */ +/* > M(j+1) <= G(j) / | A(j+1,j+1) | */ +/* > G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */ +/* > <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */ +/* > */ +/* > where CNORM(j+1) is greater than or equal to the infinity-norm of */ +/* > column j+1 of A, not counting the diagonal. Hence */ +/* > */ +/* > G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */ +/* > 1<=i<=j */ +/* > and */ +/* > */ +/* > |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */ +/* > 1<=i< j */ +/* > */ +/* > Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the */ +/* > reciprocal of the largest M(j), j=1,..,n, is larger than */ +/* > max(underflow, 1/overflow). */ +/* > */ +/* > The bound on x(j) is also used to determine when a step in the */ +/* > columnwise method can be performed without fear of overflow. If */ +/* > the computed bound is greater than a large constant, x is scaled to */ +/* > prevent overflow, but if the bound overflows, x is set to 0, x(j) to */ +/* > 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */ +/* > */ +/* > Similarly, a row-wise scheme is used to solve A**T*x = b. The basic */ +/* > algorithm for A upper triangular is */ +/* > */ +/* > for j = 1, ..., n */ +/* > x(j) := ( b(j) - A[1:j-1,j]**T * x[1:j-1] ) / A(j,j) */ +/* > end */ +/* > */ +/* > We simultaneously compute two bounds */ +/* > G(j) = bound on ( b(i) - A[1:i-1,i]**T * x[1:i-1] ), 1<=i<=j */ +/* > M(j) = bound on x(i), 1<=i<=j */ +/* > */ +/* > The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */ +/* > add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */ +/* > Then the bound on x(j) is */ +/* > */ +/* > M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */ +/* > */ +/* > <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */ +/* > 1<=i<=j */ +/* > */ +/* > and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater */ +/* > than max(underflow, 1/overflow). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dlatrs_(char *uplo, char *trans, char *diag, char * + normin, integer *n, doublereal *a, integer *lda, doublereal *x, + doublereal *scale, doublereal *cnorm, integer *info, ftnlen uplo_len, + ftnlen trans_len, ftnlen diag_len, ftnlen normin_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j; + doublereal xj, rec, tjj; + integer jinc; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal xbnd; + integer imax; + doublereal tmax, tjjs, xmax, grow, sumj; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal tscal, uscal; + extern doublereal dasum_(integer *, doublereal *, integer *); + integer jlast; + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int dtrsv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, + ftnlen); + extern doublereal dlamch_(char *, ftnlen), dlange_(char *, integer *, + integer *, doublereal *, integer *, doublereal *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + logical notran; + integer jfirst; + doublereal smlnum; + logical nounit; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --cnorm; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* Test the input parameters. */ + + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) && ! + lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (! nounit && ! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -3; + } else if (! lsame_(normin, (char *)"Y", (ftnlen)1, (ftnlen)1) && ! lsame_(normin, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLATRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + *scale = 1.; + if (*n == 0) { + return 0; + } + +/* Determine machine dependent parameters to control overflow. */ + + smlnum = dlamch_((char *)"Safe minimum", (ftnlen)12) / dlamch_((char *)"Precision", ( + ftnlen)9); + bignum = 1. / smlnum; + + if (lsame_(normin, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Compute the 1-norm of each column, not including the diagonal. */ + + if (upper) { + +/* A is upper triangular. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + cnorm[j] = dasum_(&i__2, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* A is lower triangular. */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + cnorm[j] = dasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); +/* L20: */ + } + cnorm[*n] = 0.; + } + } + +/* Scale the column norms by TSCAL if the maximum element in CNORM is */ +/* greater than BIGNUM. */ + + imax = idamax_(n, &cnorm[1], &c__1); + tmax = cnorm[imax]; + if (tmax <= bignum) { + tscal = 1.; + } else { + +/* Avoid NaN generation if entries in CNORM exceed the */ +/* overflow threshold */ + + if (tmax <= dlamch_((char *)"Overflow", (ftnlen)8)) { +/* Case 1: All entries in CNORM are valid floating-point numbers */ + tscal = 1. / (smlnum * tmax); + dscal_(n, &tscal, &cnorm[1], &c__1); + } else { +/* Case 2: At least one column norm of A cannot be represented */ +/* as floating-point number. Find the offdiagonal entry A( I, J ) */ +/* with the largest absolute value. If this entry is not +/- Infinity, */ +/* use this value as TSCAL. */ + tmax = 0.; + if (upper) { + +/* A is upper triangular. */ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = j - 1; + d__1 = dlange_((char *)"M", &i__2, &c__1, &a[j * a_dim1 + 1], & + c__1, &sumj, (ftnlen)1); + tmax = max(d__1,tmax); + } + } else { + +/* A is lower triangular. */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = *n - j; + d__1 = dlange_((char *)"M", &i__2, &c__1, &a[j + 1 + j * a_dim1], + &c__1, &sumj, (ftnlen)1); + tmax = max(d__1,tmax); + } + } + + if (tmax <= dlamch_((char *)"Overflow", (ftnlen)8)) { + tscal = 1. / (smlnum * tmax); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (cnorm[j] <= dlamch_((char *)"Overflow", (ftnlen)8)) { + cnorm[j] *= tscal; + } else { +/* Recompute the 1-norm without introducing Infinity */ +/* in the summation */ + cnorm[j] = 0.; + if (upper) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + cnorm[j] += tscal * (d__1 = a[i__ + j * + a_dim1], abs(d__1)); + } + } else { + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + cnorm[j] += tscal * (d__1 = a[i__ + j * + a_dim1], abs(d__1)); + } + } + } + } + } else { +/* At least one entry of A is not a valid floating-point entry. */ +/* Rely on TRSV to propagate Inf and NaN. */ + dtrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + return 0; + } + } + } + +/* Compute a bound on the computed solution vector to see if the */ +/* Level 2 BLAS routine DTRSV can be used. */ + + j = idamax_(n, &x[1], &c__1); + xmax = (d__1 = x[j], abs(d__1)); + xbnd = xmax; + if (notran) { + +/* Compute the growth in A * x = b. */ + + if (upper) { + jfirst = *n; + jlast = 1; + jinc = -1; + } else { + jfirst = 1; + jlast = *n; + jinc = 1; + } + + if (tscal != 1.) { + grow = 0.; + goto L50; + } + + if (nounit) { + +/* A is non-unit triangular. */ + +/* Compute GROW = 1/G(j) and XBND = 1/M(j). */ +/* Initially, G(0) = max{x(i), i=1,...,n}. */ + + grow = 1. / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L50; + } + +/* M(j) = G(j-1) / abs(A(j,j)) */ + + tjj = (d__1 = a[j + j * a_dim1], abs(d__1)); +/* Computing MIN */ + d__1 = xbnd, d__2 = min(1.,tjj) * grow; + xbnd = min(d__1,d__2); + if (tjj + cnorm[j] >= smlnum) { + +/* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ + + grow *= tjj / (tjj + cnorm[j]); + } else { + +/* G(j) could overflow, set GROW to 0. */ + + grow = 0.; + } +/* L30: */ + } + grow = xbnd; + } else { + +/* A is unit triangular. */ + +/* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ + +/* Computing MIN */ + d__1 = 1., d__2 = 1. / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L50; + } + +/* G(j) = G(j-1)*( 1 + CNORM(j) ) */ + + grow *= 1. / (cnorm[j] + 1.); +/* L40: */ + } + } +L50: + + ; + } else { + +/* Compute the growth in A**T * x = b. */ + + if (upper) { + jfirst = 1; + jlast = *n; + jinc = 1; + } else { + jfirst = *n; + jlast = 1; + jinc = -1; + } + + if (tscal != 1.) { + grow = 0.; + goto L80; + } + + if (nounit) { + +/* A is non-unit triangular. */ + +/* Compute GROW = 1/G(j) and XBND = 1/M(j). */ +/* Initially, M(0) = max{x(i), i=1,...,n}. */ + + grow = 1. / max(xbnd,smlnum); + xbnd = grow; + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L80; + } + +/* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ + + xj = cnorm[j] + 1.; +/* Computing MIN */ + d__1 = grow, d__2 = xbnd / xj; + grow = min(d__1,d__2); + +/* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ + + tjj = (d__1 = a[j + j * a_dim1], abs(d__1)); + if (xj > tjj) { + xbnd *= tjj / xj; + } +/* L60: */ + } + grow = min(grow,xbnd); + } else { + +/* A is unit triangular. */ + +/* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ + +/* Computing MIN */ + d__1 = 1., d__2 = 1. / max(xbnd,smlnum); + grow = min(d__1,d__2); + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Exit the loop if the growth factor is too small. */ + + if (grow <= smlnum) { + goto L80; + } + +/* G(j) = ( 1 + CNORM(j) )*G(j-1) */ + + xj = cnorm[j] + 1.; + grow /= xj; +/* L70: */ + } + } +L80: + ; + } + + if (grow * tscal > smlnum) { + +/* Use the Level 2 BLAS solve if the reciprocal of the bound on */ +/* elements of X is not too small. */ + + dtrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1, (ftnlen) + 1, (ftnlen)1, (ftnlen)1); + } else { + +/* Use a Level 1 BLAS solve, scaling intermediate results. */ + + if (xmax > bignum) { + +/* Scale X so that its components are less than or equal to */ +/* BIGNUM in absolute value. */ + + *scale = bignum / xmax; + dscal_(n, scale, &x[1], &c__1); + xmax = bignum; + } + + if (notran) { + +/* Solve A * x = b */ + + i__1 = jlast; + i__2 = jinc; + for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ + + xj = (d__1 = x[j], abs(d__1)); + if (nounit) { + tjjs = a[j + j * a_dim1] * tscal; + } else { + tjjs = tscal; + if (tscal == 1.) { + goto L100; + } + } + tjj = abs(tjjs); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale x by 1/b(j). */ + + rec = 1. / xj; + dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j] /= tjjs; + xj = (d__1 = x[j], abs(d__1)); + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ +/* to avoid overflow when dividing by A(j,j). */ + + rec = tjj * bignum / xj; + if (cnorm[j] > 1.) { + +/* Scale by 1/CNORM(j) to avoid overflow when */ +/* multiplying x(j) times column j. */ + + rec /= cnorm[j]; + } + dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + x[j] /= tjjs; + xj = (d__1 = x[j], abs(d__1)); + } else { + +/* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ +/* scale = 0, and compute a solution to A*x = 0. */ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__] = 0.; +/* L90: */ + } + x[j] = 1.; + xj = 1.; + *scale = 0.; + xmax = 0.; + } +L100: + +/* Scale x if necessary to avoid overflow when adding a */ +/* multiple of column j of A. */ + + if (xj > 1.) { + rec = 1. / xj; + if (cnorm[j] > (bignum - xmax) * rec) { + +/* Scale x by 1/(2*abs(x(j))). */ + + rec *= .5; + dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + } + } else if (xj * cnorm[j] > bignum - xmax) { + +/* Scale x by 1/2. */ + + dscal_(n, &c_b46, &x[1], &c__1); + *scale *= .5; + } + + if (upper) { + if (j > 1) { + +/* Compute the update */ +/* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ + + i__3 = j - 1; + d__1 = -x[j] * tscal; + daxpy_(&i__3, &d__1, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + i__3 = j - 1; + i__ = idamax_(&i__3, &x[1], &c__1); + xmax = (d__1 = x[i__], abs(d__1)); + } + } else { + if (j < *n) { + +/* Compute the update */ +/* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ + + i__3 = *n - j; + d__1 = -x[j] * tscal; + daxpy_(&i__3, &d__1, &a[j + 1 + j * a_dim1], &c__1, & + x[j + 1], &c__1); + i__3 = *n - j; + i__ = j + idamax_(&i__3, &x[j + 1], &c__1); + xmax = (d__1 = x[i__], abs(d__1)); + } + } +/* L110: */ + } + + } else { + +/* Solve A**T * x = b */ + + i__2 = jlast; + i__1 = jinc; + for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Compute x(j) = b(j) - sum A(k,j)*x(k). */ +/* k<>j */ + + xj = (d__1 = x[j], abs(d__1)); + uscal = tscal; + rec = 1. / max(xmax,1.); + if (cnorm[j] > (bignum - xj) * rec) { + +/* If x(j) could overflow, scale x by 1/(2*XMAX). */ + + rec *= .5; + if (nounit) { + tjjs = a[j + j * a_dim1] * tscal; + } else { + tjjs = tscal; + } + tjj = abs(tjjs); + if (tjj > 1.) { + +/* Divide by A(j,j) when scaling x if A(j,j) > 1. */ + +/* Computing MIN */ + d__1 = 1., d__2 = rec * tjj; + rec = min(d__1,d__2); + uscal /= tjjs; + } + if (rec < 1.) { + dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + + sumj = 0.; + if (uscal == 1.) { + +/* If the scaling needed for A in the dot product is 1, */ +/* call DDOT to perform the dot product. */ + + if (upper) { + i__3 = j - 1; + sumj = ddot_(&i__3, &a[j * a_dim1 + 1], &c__1, &x[1], + &c__1); + } else if (j < *n) { + i__3 = *n - j; + sumj = ddot_(&i__3, &a[j + 1 + j * a_dim1], &c__1, &x[ + j + 1], &c__1); + } + } else { + +/* Otherwise, use in-line code for the dot product. */ + + if (upper) { + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + sumj += a[i__ + j * a_dim1] * uscal * x[i__]; +/* L120: */ + } + } else if (j < *n) { + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + sumj += a[i__ + j * a_dim1] * uscal * x[i__]; +/* L130: */ + } + } + } + + if (uscal == tscal) { + +/* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ +/* was not used to scale the dotproduct. */ + + x[j] -= sumj; + xj = (d__1 = x[j], abs(d__1)); + if (nounit) { + tjjs = a[j + j * a_dim1] * tscal; + } else { + tjjs = tscal; + if (tscal == 1.) { + goto L150; + } + } + +/* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ + + tjj = abs(tjjs); + if (tjj > smlnum) { + +/* abs(A(j,j)) > SMLNUM: */ + + if (tjj < 1.) { + if (xj > tjj * bignum) { + +/* Scale X by 1/abs(x(j)). */ + + rec = 1. / xj; + dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + } + x[j] /= tjjs; + } else if (tjj > 0.) { + +/* 0 < abs(A(j,j)) <= SMLNUM: */ + + if (xj > tjj * bignum) { + +/* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ + + rec = tjj * bignum / xj; + dscal_(n, &rec, &x[1], &c__1); + *scale *= rec; + xmax *= rec; + } + x[j] /= tjjs; + } else { + +/* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ +/* scale = 0, and compute a solution to A**T*x = 0. */ + + i__3 = *n; + for (i__ = 1; i__ <= i__3; ++i__) { + x[i__] = 0.; +/* L140: */ + } + x[j] = 1.; + *scale = 0.; + xmax = 0.; + } +L150: + ; + } else { + +/* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ +/* product has already been divided by 1/A(j,j). */ + + x[j] = x[j] / tjjs - sumj; + } +/* Computing MAX */ + d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1)); + xmax = max(d__2,d__3); +/* L160: */ + } + } + *scale /= tscal; + } + +/* Scale the column norms by 1/TSCAL for return. */ + + if (tscal != 1.) { + d__1 = 1. / tscal; + dscal_(n, &d__1, &cnorm[1], &c__1); + } + + return 0; + +/* End of DLATRS */ + +} /* dlatrs_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dnrm2.cpp b/lib/linalg/dnrm2.cpp new file mode 100644 index 0000000000..6ebf2d935e --- /dev/null +++ b/lib/linalg/dnrm2.cpp @@ -0,0 +1,169 @@ +/* fortran/dnrm2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DNRM2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION X(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DNRM2 returns the euclidean norm of a vector via the function */ +/* > name, so that */ +/* > */ +/* > DNRM2 := sqrt( x'*x ) */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > -- This version written on 25-October-1982. */ +/* > Modified on 14-October-1993 to inline the call to DLASSQ. */ +/* > Sven Hammarling, Nag Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +doublereal dnrm2_(integer *n, doublereal *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal ret_val, d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer ix; + doublereal ssq, norm, scale, absxi; + + +/* -- Reference BLAS level1 routine (version 3.7.0) -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else if (*n == 1) { + norm = abs(x[1]); + } else { + scale = 0.; + ssq = 1.; +/* The following loop is equivalent to this call to the LAPACK */ +/* auxiliary routine: */ +/* CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + if (x[ix] != 0.) { + absxi = (d__1 = x[ix], abs(d__1)); + if (scale < absxi) { +/* Computing 2nd power */ + d__1 = scale / absxi; + ssq = ssq * (d__1 * d__1) + 1.; + scale = absxi; + } else { +/* Computing 2nd power */ + d__1 = absxi / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DNRM2. */ + +} /* dnrm2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorg2l.cpp b/lib/linalg/dorg2l.cpp new file mode 100644 index 0000000000..5705f3b630 --- /dev/null +++ b/lib/linalg/dorg2l.cpp @@ -0,0 +1,252 @@ +/* fortran/dorg2l.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DORG2L generates all or part of the orthogonal matrix Q from a QL factorization determined by s +geqlf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORG2L + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORG2L generates an m by n real matrix Q with orthonormal columns, */ +/* > which is defined as the last n columns of a product of k elementary */ +/* > reflectors of order m */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by DGEQLF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the (n-k+i)-th column must contain the vector which */ +/* > defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* > returned by DGEQLF in the last k columns of its array */ +/* > argument A. */ +/* > On exit, the m by n matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGEQLF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorg2l_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Local variables */ + integer i__, j, l, ii; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + ftnlen), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORG2L", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns 1:n-k to columns of the unit matrix */ + + i__1 = *n - *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + a[*m - *n + j + j * a_dim1] = 1.; +/* L20: */ + } + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + ii = *n - *k + i__; + +/* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */ + + a[*m - *n + ii + ii * a_dim1] = 1.; + i__2 = *m - *n + ii; + i__3 = ii - 1; + dlarf_((char *)"Left", &i__2, &i__3, &a[ii * a_dim1 + 1], &c__1, &tau[i__], & + a[a_offset], lda, &work[1], (ftnlen)4); + i__2 = *m - *n + ii - 1; + d__1 = -tau[i__]; + dscal_(&i__2, &d__1, &a[ii * a_dim1 + 1], &c__1); + a[*m - *n + ii + ii * a_dim1] = 1. - tau[i__]; + +/* Set A(m-k+i+1:m,n-k+i) to zero */ + + i__2 = *m; + for (l = *m - *n + ii + 1; l <= i__2; ++l) { + a[l + ii * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORG2L */ + +} /* dorg2l_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorg2r.cpp b/lib/linalg/dorg2r.cpp new file mode 100644 index 0000000000..90ebed3963 --- /dev/null +++ b/lib/linalg/dorg2r.cpp @@ -0,0 +1,255 @@ +/* fortran/dorg2r.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DORG2R generates all or part of the orthogonal matrix Q from a QR factorization determined by s +geqrf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORG2R + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORG2R generates an m by n real matrix Q with orthonormal columns, */ +/* > which is defined as the first n columns of a product of k elementary */ +/* > reflectors of order m */ +/* > */ +/* > Q = H(1) H(2) . . . H(k) */ +/* > */ +/* > as returned by DGEQRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the i-th column must contain the vector which */ +/* > defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* > returned by DGEQRF in the first k columns of its array */ +/* > argument A. */ +/* > On exit, the m-by-n matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j, l; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + ftnlen), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORG2R", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + a[j + j * a_dim1] = 1.; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + a[i__ + i__ * a_dim1] = 1.; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + dlarf_((char *)"Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1], ( + ftnlen)4); + } + if (i__ < *m) { + i__1 = *m - i__; + d__1 = -tau[i__]; + dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + a[i__ + i__ * a_dim1] = 1. - tau[i__]; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[l + i__ * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORG2R */ + +} /* dorg2r_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorgbr.cpp b/lib/linalg/dorgbr.cpp new file mode 100644 index 0000000000..4991344c01 --- /dev/null +++ b/lib/linalg/dorgbr.cpp @@ -0,0 +1,404 @@ +/* fortran/dorgbr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c_n1 = -1; + +/* > \brief \b DORGBR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORGBR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER VECT */ +/* INTEGER INFO, K, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORGBR generates one of the real orthogonal matrices Q or P**T */ +/* > determined by DGEBRD when reducing a real matrix A to bidiagonal */ +/* > form: A = Q * B * P**T. Q and P**T are defined as products of */ +/* > elementary reflectors H(i) or G(i) respectively. */ +/* > */ +/* > If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q */ +/* > is of order M: */ +/* > if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n */ +/* > columns of Q, where m >= n >= k; */ +/* > if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an */ +/* > M-by-M matrix. */ +/* > */ +/* > If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T */ +/* > is of order N: */ +/* > if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m */ +/* > rows of P**T, where n >= m >= k; */ +/* > if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as */ +/* > an N-by-N matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > Specifies whether the matrix Q or the matrix P**T is */ +/* > required, as defined in the transformation applied by DGEBRD: */ +/* > = 'Q': generate Q; */ +/* > = 'P': generate P**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q or P**T to be returned. */ +/* > M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q or P**T to be returned. */ +/* > N >= 0. */ +/* > If VECT = 'Q', M >= N >= min(M,K); */ +/* > if VECT = 'P', N >= M >= min(N,K). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > If VECT = 'Q', the number of columns in the original M-by-K */ +/* > matrix reduced by DGEBRD. */ +/* > If VECT = 'P', the number of rows in the original K-by-N */ +/* > matrix reduced by DGEBRD. */ +/* > K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the vectors which define the elementary reflectors, */ +/* > as returned by DGEBRD. */ +/* > On exit, the M-by-N matrix Q or P**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension */ +/* > (min(M,K)) if VECT = 'Q' */ +/* > (min(N,K)) if VECT = 'P' */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i) or G(i), which determines Q or P**T, as */ +/* > returned by DGEBRD in its array argument TAUQ or TAUP. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,min(M,N)). */ +/* > For optimum performance LWORK >= min(M,N)*NB, where NB */ +/* > is the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleGBcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k, + doublereal *a, integer *lda, doublereal *tau, doublereal *work, + integer *lwork, integer *info, ftnlen vect_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, mn; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + logical wantq; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dorglq_( + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *), dorgqr_( + integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + wantq = lsame_(vect, (char *)"Q", (ftnlen)1, (ftnlen)1); + mn = min(*m,*n); + lquery = *lwork == -1; + if (! wantq && ! lsame_(vect, (char *)"P", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( + *m > *n || *m < min(*n,*k))) { + *info = -3; + } else if (*k < 0) { + *info = -4; + } else if (*lda < max(1,*m)) { + *info = -6; + } else if (*lwork < max(1,mn) && ! lquery) { + *info = -9; + } + + if (*info == 0) { + work[1] = 1.; + if (wantq) { + if (*m >= *k) { + dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], &c_n1, + &iinfo); + } else { + if (*m > 1) { + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + dorgqr_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], & + work[1], &c_n1, &iinfo); + } + } + } else { + if (*k < *n) { + dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], &c_n1, + &iinfo); + } else { + if (*n > 1) { + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + dorglq_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], & + work[1], &c_n1, &iinfo); + } + } + } + lwkopt = (integer) work[1]; + lwkopt = max(lwkopt,mn); + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGBR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + work[1] = (doublereal) lwkopt; + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + work[1] = 1.; + return 0; + } + + if (wantq) { + +/* Form Q, determined by a call to DGEBRD to reduce an m-by-k */ +/* matrix */ + + if (*m >= *k) { + +/* If m >= k, assume m >= n >= k */ + + dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* If m < k, assume m = n */ + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the right, and set the first row and column of Q */ +/* to those of the unit matrix */ + + for (j = *m; j >= 2; --j) { + a[j * a_dim1 + 1] = 0.; + i__1 = *m; + for (i__ = j + 1; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L10: */ + } +/* L20: */ + } + a[a_dim1 + 1] = 1.; + i__1 = *m; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L30: */ + } + if (*m > 1) { + +/* Form Q(2:m,2:m) */ + + i__1 = *m - 1; + i__2 = *m - 1; + i__3 = *m - 1; + dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } else { + +/* Form P**T, determined by a call to DGEBRD to reduce a k-by-n */ +/* matrix */ + + if (*k < *n) { + +/* If k < n, assume k <= m <= n */ + + dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & + iinfo); + + } else { + +/* If k >= n, assume m = n */ + +/* Shift the vectors which define the elementary reflectors one */ +/* row downward, and set the first row and column of P**T to */ +/* those of the unit matrix */ + + a[a_dim1 + 1] = 1.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L40: */ + } + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + for (i__ = j - 1; i__ >= 2; --i__) { + a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1]; +/* L50: */ + } + a[j * a_dim1 + 1] = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Form P**T(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[ + 1], &work[1], lwork, &iinfo); + } + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGBR */ + +} /* dorgbr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorgl2.cpp b/lib/linalg/dorgl2.cpp new file mode 100644 index 0000000000..a2cce985f3 --- /dev/null +++ b/lib/linalg/dorgl2.cpp @@ -0,0 +1,254 @@ +/* fortran/dorgl2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DORGL2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORGL2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORGL2 generates an m by n real matrix Q with orthonormal rows, */ +/* > which is defined as the first m rows of a product of k elementary */ +/* > reflectors of order n */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by DGELQF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. N >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. M >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the i-th row must contain the vector which defines */ +/* > the elementary reflector H(i), for i = 1,2,...,k, as returned */ +/* > by DGELQF in the first k rows of its array argument A. */ +/* > On exit, the m-by-n matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer i__, j, l; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlarf_(char *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + ftnlen), xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGL2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + a[l + j * a_dim1] = 0.; +/* L10: */ + } + if (j > *k && j <= *m) { + a[j + j * a_dim1] = 1.; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the right */ + + if (i__ < *n) { + if (i__ < *m) { + a[i__ + i__ * a_dim1] = 1.; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + dlarf_((char *)"Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1], ( + ftnlen)5); + } + i__1 = *n - i__; + d__1 = -tau[i__]; + dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + a[i__ + i__ * a_dim1] = 1. - tau[i__]; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + a[i__ + l * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of DORGL2 */ + +} /* dorgl2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorglq.cpp b/lib/linalg/dorglq.cpp new file mode 100644 index 0000000000..3dc2d1986c --- /dev/null +++ b/lib/linalg/dorglq.cpp @@ -0,0 +1,366 @@ +/* fortran/dorglq.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* > \brief \b DORGLQ */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORGLQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORGLQ generates an M-by-N real matrix Q with orthonormal rows, */ +/* > which is defined as the first M rows of a product of K elementary */ +/* > reflectors of order N */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by DGELQF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. N >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. M >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the i-th row must contain the vector which defines */ +/* > the elementary reflector H(i), for i = 1,2,...,k, as returned */ +/* > by DGELQF in the first k rows of its array argument A. */ +/* > On exit, the M-by-N matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,M). */ +/* > For optimum performance LWORK >= M*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, (char *)"DORGLQ", (char *)" ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*m) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*m) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGLQ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *m; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DORGLQ", (char *)" ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *m; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DORGLQ", (char *)" ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the last block. */ +/* The first kk rows are handled by the block method. */ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(kk+1:m,1:kk) to zero. */ + + i__1 = kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = kk + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *m) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *m) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__2 = *n - i__ + 1; + dlarft_((char *)"Forward", (char *)"Rowwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork, (ftnlen)7, + (ftnlen)7); + +/* Apply H**T to A(i+ib:m,i:n) from the right */ + + i__2 = *m - i__ - ib + 1; + i__3 = *n - i__ + 1; + dlarfb_((char *)"Right", (char *)"Transpose", (char *)"Forward", (char *)"Rowwise", &i__2, & + i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], & + ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib + + 1], &ldwork, (ftnlen)5, (ftnlen)9, (ftnlen)7, (ftnlen) + 7); + } + +/* Apply H**T to columns i:n of current block */ + + i__2 = *n - i__ + 1; + dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set columns 1:i-1 of current block to zero */ + + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = i__ + ib - 1; + for (l = i__; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGLQ */ + +} /* dorglq_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorgql.cpp b/lib/linalg/dorgql.cpp new file mode 100644 index 0000000000..22a9b05b4b --- /dev/null +++ b/lib/linalg/dorgql.cpp @@ -0,0 +1,376 @@ +/* fortran/dorgql.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* > \brief \b DORGQL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORGQL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORGQL generates an M-by-N real matrix Q with orthonormal columns, */ +/* > which is defined as the last N columns of a product of K elementary */ +/* > reflectors of order M */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by DGEQLF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the (n-k+i)-th column must contain the vector which */ +/* > defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* > returned by DGEQLF in the last k columns of its array */ +/* > argument A. */ +/* > On exit, the M-by-N matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGEQLF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorgql_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ib, nb, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dorg2l_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, (char *)"DORGQL", (char *)" ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = *n * nb; + } + work[1] = (doublereal) lwkopt; + + if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGQL", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DORGQL", (char *)" ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DORGQL", (char *)" ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the first block. */ +/* The last kk columns are handled by the block method. */ + +/* Computing MIN */ + i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; + kk = min(i__1,i__2); + +/* Set A(m-kk+1:m,1:n-kk) to zero. */ + + i__1 = *n - kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *m - kk + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the first or only block. */ + + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + dorg2l_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo) + ; + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = *k; + i__2 = nb; + for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *k - i__ + 1; + ib = min(i__3,i__4); + if (*n - *k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *m - *k + i__ + ib - 1; + dlarft_((char *)"Backward", (char *)"Columnwise", &i__3, &ib, &a[(*n - *k + + i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork, + (ftnlen)8, (ftnlen)10); + +/* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ + + i__3 = *m - *k + i__ + ib - 1; + i__4 = *n - *k + i__ - 1; + dlarfb_((char *)"Left", (char *)"No transpose", (char *)"Backward", (char *)"Columnwise", & + i__3, &i__4, &ib, &a[(*n - *k + i__) * a_dim1 + 1], + lda, &work[1], &ldwork, &a[a_offset], lda, &work[ib + + 1], &ldwork, (ftnlen)4, (ftnlen)12, (ftnlen)8, ( + ftnlen)10); + } + +/* Apply H to rows 1:m-k+i+ib-1 of current block */ + + i__3 = *m - *k + i__ + ib - 1; + dorg2l_(&i__3, &ib, &ib, &a[(*n - *k + i__) * a_dim1 + 1], lda, & + tau[i__], &work[1], &iinfo); + +/* Set rows m-k+i+ib:m of current block to zero */ + + i__3 = *n - *k + i__ + ib - 1; + for (j = *n - *k + i__; j <= i__3; ++j) { + i__4 = *m; + for (l = *m - *k + i__ + ib; l <= i__4; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGQL */ + +} /* dorgql_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorgqr.cpp b/lib/linalg/dorgqr.cpp new file mode 100644 index 0000000000..30f175f707 --- /dev/null +++ b/lib/linalg/dorgqr.cpp @@ -0,0 +1,367 @@ +/* fortran/dorgqr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* > \brief \b DORGQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORGQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORGQR generates an M-by-N real matrix Q with orthonormal columns, */ +/* > which is defined as the first N columns of a product of K elementary */ +/* > reflectors of order M */ +/* > */ +/* > Q = H(1) H(2) . . . H(k) */ +/* > */ +/* > as returned by DGEQRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the i-th column must contain the vector which */ +/* > defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* > returned by DGEQRF in the first k columns of its array */ +/* > argument A. */ +/* > On exit, the M-by-N matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal * + a, integer *lda, doublereal *tau, doublereal *work, integer *lwork, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), dlarft_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, (char *)"DORGQR", (char *)" ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1] = (doublereal) lwkopt; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGQR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"DORGQR", (char *)" ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DORGQR", (char *)" ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the last block. */ +/* The first kk columns are handled by the block method. */ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__2 = *m - i__ + 1; + dlarft_((char *)"Forward", (char *)"Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork, (ftnlen)7, + (ftnlen)10); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + dlarfb_((char *)"Left", (char *)"No transpose", (char *)"Forward", (char *)"Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork, (ftnlen)4, (ftnlen)12, (ftnlen) + 7, (ftnlen)10); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + a[l + j * a_dim1] = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1] = (doublereal) iws; + return 0; + +/* End of DORGQR */ + +} /* dorgqr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorgtr.cpp b/lib/linalg/dorgtr.cpp new file mode 100644 index 0000000000..7686c56d19 --- /dev/null +++ b/lib/linalg/dorgtr.cpp @@ -0,0 +1,331 @@ +/* fortran/dorgtr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* > \brief \b DORGTR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORGTR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORGTR generates a real orthogonal matrix Q which is defined as the */ +/* > product of n-1 elementary reflectors of order N, as returned by */ +/* > DSYTRD: */ +/* > */ +/* > if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), */ +/* > */ +/* > if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A contains elementary reflectors */ +/* > from DSYTRD; */ +/* > = 'L': Lower triangle of A contains elementary reflectors */ +/* > from DSYTRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix Q. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the vectors which define the elementary reflectors, */ +/* > as returned by DSYTRD. */ +/* > On exit, the N-by-N orthogonal matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (N-1) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DSYTRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,N-1). */ +/* > For optimum performance LWORK >= (N-1)*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorgtr_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *tau, doublereal *work, integer *lwork, integer *info, + ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dorgql_(integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + integer *), dorgqr_(integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n - 1; + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -7; + } + } + + if (*info == 0) { + if (upper) { + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, (char *)"DORGQL", (char *)" ", &i__1, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)1); + } else { + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, (char *)"DORGQR", (char *)" ", &i__1, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)1); + } +/* Computing MAX */ + i__1 = 1, i__2 = *n - 1; + lwkopt = max(i__1,i__2) * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGTR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + if (upper) { + +/* Q was determined by a call to DSYTRD with UPLO = 'U' */ + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the left, and set the last row and column of Q to */ +/* those of the unit matrix */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j + 1) * a_dim1]; +/* L10: */ + } + a[*n + j * a_dim1] = 0.; +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + a[i__ + *n * a_dim1] = 0.; +/* L30: */ + } + a[*n + *n * a_dim1] = 1.; + +/* Generate Q(1:n-1,1:n-1) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + dorgql_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], + lwork, &iinfo); + + } else { + +/* Q was determined by a call to DSYTRD with UPLO = 'L'. */ + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the right, and set the first row and column of Q to */ +/* those of the unit matrix */ + + for (j = *n; j >= 2; --j) { + a[j * a_dim1 + 1] = 0.; + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; +/* L40: */ + } +/* L50: */ + } + a[a_dim1 + 1] = 1.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + a[i__ + a_dim1] = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Generate Q(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[1], + &work[1], lwork, &iinfo); + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORGTR */ + +} /* dorgtr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorm2l.cpp b/lib/linalg/dorm2l.cpp new file mode 100644 index 0000000000..6669a81614 --- /dev/null +++ b/lib/linalg/dorm2l.cpp @@ -0,0 +1,324 @@ +/* fortran/dorm2l.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DORM2L multiplies a general matrix by the orthogonal matrix from a QL factorization determined +by sgeqlf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORM2L + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORM2L overwrites the general real m by n matrix C with */ +/* > */ +/* > Q * C if SIDE = 'L' and TRANS = 'N', or */ +/* > */ +/* > Q**T * C if SIDE = 'L' and TRANS = 'T', or */ +/* > */ +/* > C * Q if SIDE = 'R' and TRANS = 'N', or */ +/* > */ +/* > C * Q**T if SIDE = 'R' and TRANS = 'T', */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left */ +/* > = 'R': apply Q or Q**T from the Right */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': apply Q (No transpose) */ +/* > = 'T': apply Q**T (Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > DGEQLF in the last k columns of its array argument A. */ +/* > A is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= max(1,M); */ +/* > if SIDE = 'R', LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGEQLF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension */ +/* > (N) if SIDE = 'L', */ +/* > (M) if SIDE = 'R' */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info, ftnlen side_len, + ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORM2L", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) */ + + aii = a[nq - *k + i__ + i__ * a_dim1]; + a[nq - *k + i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[ + c_offset], ldc, &work[1], (ftnlen)1); + a[nq - *k + i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2L */ + +} /* dorm2l_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorm2r.cpp b/lib/linalg/dorm2r.cpp new file mode 100644 index 0000000000..13a74e8fe1 --- /dev/null +++ b/lib/linalg/dorm2r.cpp @@ -0,0 +1,328 @@ +/* fortran/dorm2r.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DORM2R multiplies a general matrix by the orthogonal matrix from a QR factorization determined +by sgeqrf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORM2R + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORM2R overwrites the general real m by n matrix C with */ +/* > */ +/* > Q * C if SIDE = 'L' and TRANS = 'N', or */ +/* > */ +/* > Q**T* C if SIDE = 'L' and TRANS = 'T', or */ +/* > */ +/* > C * Q if SIDE = 'R' and TRANS = 'N', or */ +/* > */ +/* > C * Q**T if SIDE = 'R' and TRANS = 'T', */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k) */ +/* > */ +/* > as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left */ +/* > = 'R': apply Q or Q**T from the Right */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': apply Q (No transpose) */ +/* > = 'T': apply Q**T (Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > DGEQRF in the first k columns of its array argument A. */ +/* > A is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= max(1,M); */ +/* > if SIDE = 'R', LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension */ +/* > (N) if SIDE = 'L', */ +/* > (M) if SIDE = 'R' */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info, ftnlen side_len, + ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORM2R", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1], (ftnlen)1); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORM2R */ + +} /* dorm2r_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dormbr.cpp b/lib/linalg/dormbr.cpp new file mode 100644 index 0000000000..e00ec3ecb0 --- /dev/null +++ b/lib/linalg/dormbr.cpp @@ -0,0 +1,464 @@ +/* fortran/dormbr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; + +/* > \brief \b DORMBR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORMBR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, */ +/* LDC, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS, VECT */ +/* INTEGER INFO, K, LDA, LDC, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C */ +/* > with */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > */ +/* > If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C */ +/* > with */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': P * C C * P */ +/* > TRANS = 'T': P**T * C C * P**T */ +/* > */ +/* > Here Q and P**T are the orthogonal matrices determined by DGEBRD when */ +/* > reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and */ +/* > P**T are defined as products of elementary reflectors H(i) and G(i) */ +/* > respectively. */ +/* > */ +/* > Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the */ +/* > order of the orthogonal matrix Q or P**T that is applied. */ +/* > */ +/* > If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: */ +/* > if nq >= k, Q = H(1) H(2) . . . H(k); */ +/* > if nq < k, Q = H(1) H(2) . . . H(nq-1). */ +/* > */ +/* > If VECT = 'P', A is assumed to have been a K-by-NQ matrix: */ +/* > if k < nq, P = G(1) G(2) . . . G(k); */ +/* > if k >= nq, P = G(1) G(2) . . . G(nq-1). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] VECT */ +/* > \verbatim */ +/* > VECT is CHARACTER*1 */ +/* > = 'Q': apply Q or Q**T; */ +/* > = 'P': apply P or P**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q, Q**T, P or P**T from the Left; */ +/* > = 'R': apply Q, Q**T, P or P**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q or P; */ +/* > = 'T': Transpose, apply Q**T or P**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > If VECT = 'Q', the number of columns in the original */ +/* > matrix reduced by DGEBRD. */ +/* > If VECT = 'P', the number of rows in the original */ +/* > matrix reduced by DGEBRD. */ +/* > K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension */ +/* > (LDA,min(nq,K)) if VECT = 'Q' */ +/* > (LDA,nq) if VECT = 'P' */ +/* > The vectors which define the elementary reflectors H(i) and */ +/* > G(i), whose products determine the matrices Q and P, as */ +/* > returned by DGEBRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If VECT = 'Q', LDA >= max(1,nq); */ +/* > if VECT = 'P', LDA >= max(1,min(nq,K)). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (min(nq,K)) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i) or G(i) which determines Q or P, as returned */ +/* > by DGEBRD in the array argument TAUQ or TAUP. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q */ +/* > or P*C or P**T*C or C*P or C*P**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SIDE = 'L', LWORK >= max(1,N); */ +/* > if SIDE = 'R', LWORK >= max(1,M). */ +/* > For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* > LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* > blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m, + integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, + doublereal *c__, integer *ldc, doublereal *work, integer *lwork, + integer *info, ftnlen vect_len, ftnlen side_len, ftnlen trans_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i1, i2, nb, mi, ni, nq, nw; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, ftnlen, ftnlen); + logical notran; + extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, ftnlen, ftnlen); + logical applyq; + char transt[1]; + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + applyq = lsame_(vect, (char *)"Q", (ftnlen)1, (ftnlen)1); + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + +/* NQ is the order of Q or P and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! applyq && ! lsame_(vect, (char *)"P", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*k < 0) { + *info = -6; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = min(nq,*k); + if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { + *info = -8; + } else if (*ldc < max(1,*m)) { + *info = -11; + } else if (*lwork < nw && ! lquery) { + *info = -13; + } + } + + if (*info == 0) { + if (applyq) { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *m - 1; + i__2 = *m - 1; + nb = ilaenv_(&c__1, (char *)"DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = *n - 1; + i__2 = *n - 1; + nb = ilaenv_(&c__1, (char *)"DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = nw * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORMBR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + work[1] = 1.; + if (*m == 0 || *n == 0) { + return 0; + } + + if (applyq) { + +/* Apply Q */ + + if (nq >= *k) { + +/* Q was determined by a call to DGEBRD with nq >= k */ + + dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo, (ftnlen)1, ( + ftnlen)1); + } else if (nq > 1) { + +/* Q was determined by a call to DGEBRD with nq < k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1] + , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo, ( + ftnlen)1, (ftnlen)1); + } + } else { + +/* Apply P */ + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + if (nq > *k) { + +/* P was determined by a call to DGEBRD with nq > k */ + + dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], lwork, &iinfo, (ftnlen)1, ( + ftnlen)1); + } else if (nq > 1) { + +/* P was determined by a call to DGEBRD with nq <= k */ + + if (left) { + mi = *m - 1; + ni = *n; + i1 = 2; + i2 = 1; + } else { + mi = *m; + ni = *n - 1; + i1 = 1; + i2 = 2; + } + i__1 = nq - 1; + dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda, + &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, & + iinfo, (ftnlen)1, (ftnlen)1); + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMBR */ + +} /* dormbr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dorml2.cpp b/lib/linalg/dorml2.cpp new file mode 100644 index 0000000000..9085a5a0ce --- /dev/null +++ b/lib/linalg/dorml2.cpp @@ -0,0 +1,324 @@ +/* fortran/dorml2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DORML2 multiplies a general matrix by the orthogonal matrix from a LQ factorization determined +by sgelqf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORML2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORML2 overwrites the general real m by n matrix C with */ +/* > */ +/* > Q * C if SIDE = 'L' and TRANS = 'N', or */ +/* > */ +/* > Q**T* C if SIDE = 'L' and TRANS = 'T', or */ +/* > */ +/* > C * Q if SIDE = 'R' and TRANS = 'N', or */ +/* > */ +/* > C * Q**T if SIDE = 'R' and TRANS = 'T', */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left */ +/* > = 'R': apply Q or Q**T from the Right */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': apply Q (No transpose) */ +/* > = 'T': apply Q**T (Transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension */ +/* > (LDA,M) if SIDE = 'L', */ +/* > (LDA,N) if SIDE = 'R' */ +/* > The i-th row must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > DGELQF in the first k rows of its array argument A. */ +/* > A is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,K). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the m by n matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension */ +/* > (N) if SIDE = 'L', */ +/* > (M) if SIDE = 'R' */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *info, ftnlen side_len, + ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; + + /* Local variables */ + integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + doublereal aii; + logical left; + extern /* Subroutine */ int dlarf_(char *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical notran; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORML2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) */ + + aii = a[i__ + i__ * a_dim1]; + a[i__ + i__ * a_dim1] = 1.; + dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[ + ic + jc * c_dim1], ldc, &work[1], (ftnlen)1); + a[i__ + i__ * a_dim1] = aii; +/* L10: */ + } + return 0; + +/* End of DORML2 */ + +} /* dorml2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dormlq.cpp b/lib/linalg/dormlq.cpp new file mode 100644 index 0000000000..e5ed90a9fa --- /dev/null +++ b/lib/linalg/dormlq.cpp @@ -0,0 +1,424 @@ +/* fortran/dormlq.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* > \brief \b DORMLQ */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORMLQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORMLQ overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension */ +/* > (LDA,M) if SIDE = 'L', */ +/* > (LDA,N) if SIDE = 'R' */ +/* > The i-th row must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > DGELQF in the first k rows of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,K). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SIDE = 'L', LWORK >= max(1,N); */ +/* > if SIDE = 'R', LWORK >= max(1,M). */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info, + ftnlen side_len, ftnlen trans_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__, i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iwt; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + logical notran; + integer ldwork; + char transt[1]; + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,*k)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < nw && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb + 4160; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORMLQ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + if (*lwork < lwkopt) { + nb = (*lwork - 4160) / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DORMLQ", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + } else { + +/* Use blocked code */ + + iwt = nw * nb + 1; + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + if (notran) { + *(unsigned char *)transt = 'T'; + } else { + *(unsigned char *)transt = 'N'; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__4 = nq - i__ + 1; + dlarft_((char *)"Forward", (char *)"Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1], + lda, &tau[i__], &work[iwt], &c__65, (ftnlen)7, (ftnlen)7); + if (left) { + +/* H or H**T is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H**T is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H**T */ + + dlarfb_(side, transt, (char *)"Forward", (char *)"Rowwise", &mi, &ni, &ib, &a[i__ + + i__ * a_dim1], lda, &work[iwt], &c__65, &c__[ic + jc * + c_dim1], ldc, &work[1], &ldwork, (ftnlen)1, (ftnlen)1, ( + ftnlen)7, (ftnlen)7); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMLQ */ + +} /* dormlq_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dormql.cpp b/lib/linalg/dormql.cpp new file mode 100644 index 0000000000..735f9e3d2b --- /dev/null +++ b/lib/linalg/dormql.cpp @@ -0,0 +1,416 @@ +/* fortran/dormql.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* > \brief \b DORMQL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORMQL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORMQL overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > DGEQLF in the last k columns of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= max(1,M); */ +/* > if SIDE = 'R', LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGEQLF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SIDE = 'L', LWORK >= max(1,N); */ +/* > if SIDE = 'R', LWORK >= max(1,M). */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info, + ftnlen side_len, ftnlen trans_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__, i1, i2, i3, ib, nb, mi, ni, nq, nw, iwt; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + logical notran; + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < nw && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DORMQL", ch__1, m, n, k, &c_n1, + (ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb + 4160; + } + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORMQL", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + if (*lwork < lwkopt) { + nb = (*lwork - 4160) / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DORMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + } else { + +/* Use blocked code */ + + iwt = nw * nb + 1; + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__4 = nq - *k + i__ + ib - 1; + dlarft_((char *)"Backward", (char *)"Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], &work[iwt], &c__65, (ftnlen)8, (ftnlen) + 10); + if (left) { + +/* H or H**T is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H**T is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H**T */ + + dlarfb_(side, trans, (char *)"Backward", (char *)"Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, &work[iwt], &c__65, &c__[c_offset] + , ldc, &work[1], &ldwork, (ftnlen)1, (ftnlen)1, (ftnlen)8, + (ftnlen)10); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQL */ + +} /* dormql_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dormqr.cpp b/lib/linalg/dormqr.cpp new file mode 100644 index 0000000000..7fce43b587 --- /dev/null +++ b/lib/linalg/dormqr.cpp @@ -0,0 +1,417 @@ +/* fortran/dormqr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* > \brief \b DORMQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORMQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORMQR overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k) */ +/* > */ +/* > as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > DGEQRF in the first k columns of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= max(1,M); */ +/* > if SIDE = 'R', LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SIDE = 'L', LWORK >= max(1,N); */ +/* > if SIDE = 'R', LWORK >= max(1,M). */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info, + ftnlen side_len, ftnlen trans_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__, i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iwt; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen), dlarfb_(char + *, char *, char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), dlarft_(char *, char *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + logical notran; + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < nw && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb + 4160; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORMQR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1] = 1.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + if (*lwork < lwkopt) { + nb = (*lwork - 4160) / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"DORMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + } else { + +/* Use blocked code */ + + iwt = nw * nb + 1; + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__4 = nq - i__ + 1; + dlarft_((char *)"Forward", (char *)"Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[iwt], &c__65, (ftnlen)7, ( + ftnlen)10); + if (left) { + +/* H or H**T is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H**T is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H**T */ + + dlarfb_(side, trans, (char *)"Forward", (char *)"Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, &work[iwt], &c__65, &c__[ic + + jc * c_dim1], ldc, &work[1], &ldwork, (ftnlen)1, (ftnlen) + 1, (ftnlen)7, (ftnlen)10); +/* L10: */ + } + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMQR */ + +} /* dormqr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dormtr.cpp b/lib/linalg/dormtr.cpp new file mode 100644 index 0000000000..d917e95b06 --- /dev/null +++ b/lib/linalg/dormtr.cpp @@ -0,0 +1,396 @@ +/* fortran/dormtr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; + +/* > \brief \b DORMTR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DORMTR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS, UPLO */ +/* INTEGER INFO, LDA, LDC, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DORMTR overwrites the general real M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'T': Q**T * C C * Q**T */ +/* > */ +/* > where Q is a real orthogonal matrix of order nq, with nq = m if */ +/* > SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of */ +/* > nq-1 elementary reflectors, as returned by DSYTRD: */ +/* > */ +/* > if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); */ +/* > */ +/* > if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**T from the Left; */ +/* > = 'R': apply Q or Q**T from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A contains elementary reflectors */ +/* > from DSYTRD; */ +/* > = 'L': Lower triangle of A contains elementary reflectors */ +/* > from DSYTRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'T': Transpose, apply Q**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension */ +/* > (LDA,M) if SIDE = 'L' */ +/* > (LDA,N) if SIDE = 'R' */ +/* > The vectors which define the elementary reflectors, as */ +/* > returned by DSYTRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension */ +/* > (M-1) if SIDE = 'L' */ +/* > (N-1) if SIDE = 'R' */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by DSYTRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SIDE = 'L', LWORK >= max(1,N); */ +/* > if SIDE = 'R', LWORK >= max(1,M). */ +/* > For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* > LWORK >= M*NB if SIDE = 'R', where NB is the optimal */ +/* > blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal * + c__, integer *ldc, doublereal *work, integer *lwork, integer *info, + ftnlen side_len, ftnlen uplo_len, ftnlen trans_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i1, i2, nb, mi, ni, nq, nw; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, ftnlen, ftnlen), + dormqr_(char *, char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < nw && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, (char *)"DORMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, (char *)"DORMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = nw * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"DORMTR", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1] = 1.; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to DSYTRD with UPLO = 'U' */ + + i__2 = nq - 1; + dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo, (ftnlen) + 1, (ftnlen)1); + } else { + +/* Q was determined by a call to DSYTRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo, (ftnlen) + 1, (ftnlen)1); + } + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DORMTR */ + +} /* dormtr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dposv.cpp b/lib/linalg/dposv.cpp new file mode 100644 index 0000000000..6981ea843d --- /dev/null +++ b/lib/linalg/dposv.cpp @@ -0,0 +1,232 @@ +/* fortran/dposv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief DPOSV computes the solution to system of linear equations A * X = B for PO matrices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DPOSV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DPOSV computes the solution to a real system of linear equations */ +/* > A * X = B, */ +/* > where A is an N-by-N symmetric positive definite matrix and X and B */ +/* > are N-by-NRHS matrices. */ +/* > */ +/* > The Cholesky decomposition is used to factor A as */ +/* > A = U**T* U, if UPLO = 'U', or */ +/* > A = L * L**T, if UPLO = 'L', */ +/* > where U is an upper triangular matrix and L is a lower triangular */ +/* > matrix. The factored form of A is then used to solve the system of */ +/* > equations A * X = B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of linear equations, i.e., the order of the */ +/* > matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* > factorization A = U**T*U or A = L*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the N-by-NRHS right hand side matrix B. */ +/* > On exit, if INFO = 0, the N-by-NRHS solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the leading minor of order i of A is not */ +/* > positive definite, so the factorization could not be */ +/* > completed, and the solution has not been computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doublePOsolve */ + +/* ===================================================================== */ +/* Subroutine */ int dposv_(char *uplo, integer *n, integer *nrhs, doublereal + *a, integer *lda, doublereal *b, integer *ldb, integer *info, ftnlen + uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dpotrf_( + char *, integer *, doublereal *, integer *, integer *, ftnlen), + dpotrs_(char *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, ftnlen); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DPOSV ", &i__1, (ftnlen)6); + return 0; + } + +/* Compute the Cholesky factorization A = U**T*U or A = L*L**T. */ + + dpotrf_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + dpotrs_(uplo, n, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info, ( + ftnlen)1); + + } + return 0; + +/* End of DPOSV */ + +} /* dposv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dpotf2.cpp b/lib/linalg/dpotf2.cpp new file mode 100644 index 0000000000..e32a52c97c --- /dev/null +++ b/lib/linalg/dpotf2.cpp @@ -0,0 +1,294 @@ +/* fortran/dpotf2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b10 = -1.; +static doublereal c_b12 = 1.; + +/* > \brief \b DPOTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite matrix (u +nblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DPOTF2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DPOTF2 computes the Cholesky factorization of a real symmetric */ +/* > positive definite matrix A. */ +/* > */ +/* > The factorization has the form */ +/* > A = U**T * U , if UPLO = 'U', or */ +/* > A = L * L**T, if UPLO = 'L', */ +/* > where U is an upper triangular matrix and L is lower triangular. */ +/* > */ +/* > This is the unblocked version of the algorithm, calling Level 2 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > symmetric matrix A is stored. */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > n by n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n by n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* > factorization A = U**T *U or A = L*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > > 0: if INFO = k, the leading minor of order k is not */ +/* > positive definite, and the factorization could not be */ +/* > completed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doublePOcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer j; + doublereal ajj; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dgemv_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen); + logical upper; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DPOTF2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U**T *U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1, + &a[j * a_dim1 + 1], &c__1); + if (ajj <= 0. || disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b10, &a[(j + 1) * a_dim1 + + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b12, &a[j + ( + j + 1) * a_dim1], lda, (ftnlen)9); + i__2 = *n - j; + d__1 = 1. / ajj; + dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L**T. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j + + a_dim1], lda); + if (ajj <= 0. || disnan_(&ajj)) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b10, &a[j + 1 + + a_dim1], lda, &a[j + a_dim1], lda, &c_b12, &a[j + 1 + + j * a_dim1], &c__1, (ftnlen)12); + i__2 = *n - j; + d__1 = 1. / ajj; + dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of DPOTF2 */ + +} /* dpotf2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dpotrf.cpp b/lib/linalg/dpotrf.cpp new file mode 100644 index 0000000000..cd343efd77 --- /dev/null +++ b/lib/linalg/dpotrf.cpp @@ -0,0 +1,322 @@ +/* fortran/dpotrf.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b13 = -1.; +static doublereal c_b14 = 1.; + +/* > \brief \b DPOTRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DPOTRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DPOTRF computes the Cholesky factorization of a real symmetric */ +/* > positive definite matrix A. */ +/* > */ +/* > The factorization has the form */ +/* > A = U**T * U, if UPLO = 'U', or */ +/* > A = L * L**T, if UPLO = 'L', */ +/* > where U is an upper triangular matrix and L is lower triangular. */ +/* > */ +/* > This is the block version of the algorithm, calling Level 3 BLAS. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* > factorization A = U**T*U or A = L*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the leading minor of order i is not */ +/* > positive definite, and the factorization could not be */ +/* > completed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doublePOcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer j, jb, nb; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical upper; + extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, ftnlen, ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dpotrf2_(char *, integer *, doublereal *, + integer *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DPOTRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, (char *)"DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + dpotrf2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U**T*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* Update and factorize the current diagonal block and test */ +/* for non-positive-definiteness. */ + +/* Computing MIN */ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + dsyrk_((char *)"Upper", (char *)"Transpose", &jb, &i__3, &c_b13, &a[j * + a_dim1 + 1], lda, &c_b14, &a[j + j * a_dim1], lda, ( + ftnlen)5, (ftnlen)9); + dpotrf2_((char *)"Upper", &jb, &a[j + j * a_dim1], lda, info, (ftnlen) + 5); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + dgemm_((char *)"Transpose", (char *)"No transpose", &jb, &i__3, &i__4, & + c_b13, &a[j * a_dim1 + 1], lda, &a[(j + jb) * + a_dim1 + 1], lda, &c_b14, &a[j + (j + jb) * + a_dim1], lda, (ftnlen)9, (ftnlen)12); + i__3 = *n - j - jb + 1; + dtrsm_((char *)"Left", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", &jb, & + i__3, &c_b14, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda, (ftnlen)4, (ftnlen)5, ( + ftnlen)9, (ftnlen)8); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L**T. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* Update and factorize the current diagonal block and test */ +/* for non-positive-definiteness. */ + +/* Computing MIN */ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + dsyrk_((char *)"Lower", (char *)"No transpose", &jb, &i__3, &c_b13, &a[j + + a_dim1], lda, &c_b14, &a[j + j * a_dim1], lda, ( + ftnlen)5, (ftnlen)12); + dpotrf2_((char *)"Lower", &jb, &a[j + j * a_dim1], lda, info, (ftnlen) + 5); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, & + c_b13, &a[j + jb + a_dim1], lda, &a[j + a_dim1], + lda, &c_b14, &a[j + jb + j * a_dim1], lda, ( + ftnlen)12, (ftnlen)9); + i__3 = *n - j - jb + 1; + dtrsm_((char *)"Right", (char *)"Lower", (char *)"Transpose", (char *)"Non-unit", &i__3, & + jb, &c_b14, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)9, + (ftnlen)8); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of DPOTRF */ + +} /* dpotrf_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dpotrf2.cpp b/lib/linalg/dpotrf2.cpp new file mode 100644 index 0000000000..9b882bb9e0 --- /dev/null +++ b/lib/linalg/dpotrf2.cpp @@ -0,0 +1,285 @@ +/* static/dpotrf2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b9 = 1.; +static doublereal c_b11 = -1.; + +/* > \brief \b DPOTRF2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* RECURSIVE SUBROUTINE DPOTRF2( UPLO, N, A, LDA, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* REAL A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DPOTRF2 computes the Cholesky factorization of a real symmetric */ +/* > positive definite matrix A using the recursive algorithm. */ +/* > */ +/* > The factorization has the form */ +/* > A = U**T * U, if UPLO = 'U', or */ +/* > A = L * L**T, if UPLO = 'L', */ +/* > where U is an upper triangular matrix and L is lower triangular. */ +/* > */ +/* > This is the recursive version of the algorithm. It divides */ +/* > the matrix into four submatrices: */ +/* > */ +/* > [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 */ +/* > A = [ -----|----- ] with n1 = n/2 */ +/* > [ A21 | A22 ] n2 = n-n1 */ +/* > */ +/* > The subroutine calls itself to factor A11. Update and scale A21 */ +/* > or A12, update A22 then calls itself to factor A22. */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the factor U or L from the Cholesky */ +/* > factorization A = U**T*U or A = L*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the leading minor of order i is not */ +/* > positive definite, and the factorization could not be */ +/* > completed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doublePOcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dpotrf2_(char *uplo, integer *n, doublereal *a, integer * + lda, integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer n1, n2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical upper; + extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *, ftnlen, ftnlen); + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DPOTRF2", &i__1, (ftnlen)7); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* N=1 case */ + + if (*n == 1) { + +/* Test for non-positive-definiteness */ + + if (a[a_dim1 + 1] <= 0. || disnan_(&a[a_dim1 + 1])) { + *info = 1; + return 0; + } + +/* Factor */ + + a[a_dim1 + 1] = sqrt(a[a_dim1 + 1]); + +/* Use recursive code */ + + } else { + n1 = *n / 2; + n2 = *n - n1; + +/* Factor A11 */ + + dpotrf2_(uplo, &n1, &a[a_dim1 + 1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + +/* Compute the Cholesky factorization A = U**T*U */ + + if (upper) { + +/* Update and scale A12 */ + + dtrsm_((char *)"L", (char *)"U", (char *)"T", (char *)"N", &n1, &n2, &c_b9, &a[a_dim1 + 1], lda, & + a[(n1 + 1) * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + +/* Update and factor A22 */ + + dsyrk_(uplo, (char *)"T", &n2, &n1, &c_b11, &a[(n1 + 1) * a_dim1 + 1], + lda, &c_b9, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, (ftnlen) + 1, (ftnlen)1); + dpotrf2_(uplo, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &iinfo, ( + ftnlen)1); + if (iinfo != 0) { + *info = iinfo + n1; + return 0; + } + +/* Compute the Cholesky factorization A = L*L**T */ + + } else { + +/* Update and scale A21 */ + + dtrsm_((char *)"R", (char *)"L", (char *)"T", (char *)"N", &n2, &n1, &c_b9, &a[a_dim1 + 1], lda, & + a[n1 + 1 + a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + +/* Update and factor A22 */ + + dsyrk_(uplo, (char *)"N", &n2, &n1, &c_b11, &a[n1 + 1 + a_dim1], lda, & + c_b9, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, (ftnlen)1, ( + ftnlen)1); + dpotrf2_(uplo, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &iinfo, ( + ftnlen)1); + if (iinfo != 0) { + *info = iinfo + n1; + return 0; + } + } + } + return 0; + +/* End of DPOTRF2 */ + +} /* dpotrf2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dpotrs.cpp b/lib/linalg/dpotrs.cpp new file mode 100644 index 0000000000..169f851130 --- /dev/null +++ b/lib/linalg/dpotrs.cpp @@ -0,0 +1,248 @@ +/* fortran/dpotrs.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b9 = 1.; + +/* > \brief \b DPOTRS */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DPOTRS + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LDB, N, NRHS */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DPOTRS solves a system of linear equations A*X = B with a symmetric */ +/* > positive definite matrix A using the Cholesky factorization */ +/* > A = U**T*U or A = L*L**T computed by DPOTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NRHS */ +/* > \verbatim */ +/* > NRHS is INTEGER */ +/* > The number of right hand sides, i.e., the number of columns */ +/* > of the matrix B. NRHS >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The triangular factor U or L from the Cholesky factorization */ +/* > A = U**T*U or A = L*L**T, as computed by DPOTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,NRHS) */ +/* > On entry, the right hand side matrix B. */ +/* > On exit, the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doublePOcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DPOTRS", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* Solve A*X = B where A = U**T *U. */ + +/* Solve U**T *X = B, overwriting B with X. */ + + dtrsm_((char *)"Left", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", n, nrhs, &c_b9, &a[ + a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, ( + ftnlen)9, (ftnlen)8); + +/* Solve U*X = B, overwriting B with X. */ + + dtrsm_((char *)"Left", (char *)"Upper", (char *)"No transpose", (char *)"Non-unit", n, nrhs, &c_b9, & + a[a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, ( + ftnlen)12, (ftnlen)8); + } else { + +/* Solve A*X = B where A = L*L**T. */ + +/* Solve L*X = B, overwriting B with X. */ + + dtrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Non-unit", n, nrhs, &c_b9, & + a[a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, ( + ftnlen)12, (ftnlen)8); + +/* Solve L**T *X = B, overwriting B with X. */ + + dtrsm_((char *)"Left", (char *)"Lower", (char *)"Transpose", (char *)"Non-unit", n, nrhs, &c_b9, &a[ + a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, ( + ftnlen)9, (ftnlen)8); + } + + return 0; + +/* End of DPOTRS */ + +} /* dpotrs_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/drot.cpp b/lib/linalg/drot.cpp new file mode 100644 index 0000000000..efd339cdf0 --- /dev/null +++ b/lib/linalg/drot.cpp @@ -0,0 +1,180 @@ +/* fortran/drot.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DROT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION C,S */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION DX(*),DY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DROT applies a plane rotation. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DX */ +/* > \verbatim */ +/* > DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DY */ +/* > \verbatim */ +/* > DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of DY */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *c__, doublereal *s) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, ix, iy; + doublereal dtemp; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = *c__ * dx[i__] + *s * dy[i__]; + dy[i__] = *c__ * dy[i__] - *s * dx[i__]; + dx[i__] = dtemp; + } + } else { + +/* code for unequal increments or equal increments not equal */ +/* to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = *c__ * dx[ix] + *s * dy[iy]; + dy[iy] = *c__ * dy[iy] - *s * dx[ix]; + dx[ix] = dtemp; + ix += *incx; + iy += *incy; + } + } + return 0; + +/* End of DROT */ + +} /* drot_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/drscl.cpp b/lib/linalg/drscl.cpp new file mode 100644 index 0000000000..041c7fbba3 --- /dev/null +++ b/lib/linalg/drscl.cpp @@ -0,0 +1,200 @@ +/* fortran/drscl.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DRSCL multiplies a vector by the reciprocal of a real scalar. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DRSCL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DRSCL( N, SA, SX, INCX ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX, N */ +/* DOUBLE PRECISION SA */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION SX( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DRSCL multiplies an n-element real vector x by the real scalar 1/a. */ +/* > This is done without overflow or underflow as long as */ +/* > the final result x/a does not overflow or underflow. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of components of the vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] SA */ +/* > \verbatim */ +/* > SA is DOUBLE PRECISION */ +/* > The scalar a which is used to divide each component of x. */ +/* > SA must be >= 0, or the subroutine will divide by zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SX */ +/* > \verbatim */ +/* > SX is DOUBLE PRECISION array, dimension */ +/* > (1+(N-1)*abs(INCX)) */ +/* > The n-element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The increment between successive values of the vector SX. */ +/* > > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int drscl_(integer *n, doublereal *sa, doublereal *sx, + integer *incx) +{ + doublereal mul, cden; + logical done; + doublereal cnum, cden1, cnum1; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + doublereal bignum, smlnum; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + --sx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = dlamch_((char *)"S", (ftnlen)1); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + +/* Initialize the denominator to SA and the numerator to 1. */ + + cden = *sa; + cnum = 1.; + +L10: + cden1 = cden * smlnum; + cnum1 = cnum / bignum; + if (abs(cden1) > abs(cnum) && cnum != 0.) { + +/* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. */ + + mul = smlnum; + done = FALSE_; + cden = cden1; + } else if (abs(cnum1) > abs(cden)) { + +/* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. */ + + mul = bignum; + done = FALSE_; + cnum = cnum1; + } else { + +/* Multiply X by CNUM / CDEN and return. */ + + mul = cnum / cden; + done = TRUE_; + } + +/* Scale the vector X by MUL */ + + dscal_(n, &mul, &sx[1], incx); + + if (! done) { + goto L10; + } + + return 0; + +/* End of DRSCL */ + +} /* drscl_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dscal.cpp b/lib/linalg/dscal.cpp new file mode 100644 index 0000000000..d1fc1d43ee --- /dev/null +++ b/lib/linalg/dscal.cpp @@ -0,0 +1,173 @@ +/* fortran/dscal.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DSCAL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSCAL(N,DA,DX,INCX) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION DA */ +/* INTEGER INCX,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION DX(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSCAL scales a vector by a constant. */ +/* > uses unrolled loops for increment equal to 1. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DA */ +/* > \verbatim */ +/* > DA is DOUBLE PRECISION */ +/* > On entry, DA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DX */ +/* > \verbatim */ +/* > DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > modified 3/93 to return if incx .le. 0. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, m, mp1, nincx; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. Parameters .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0 || *da == 1.) { + return 0; + } + if (*incx == 1) { + +/* code for increment equal to 1 */ + + +/* clean-up loop */ + + m = *n % 5; + if (m != 0) { + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dx[i__] = *da * dx[i__]; + } + if (*n < 5) { + return 0; + } + } + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + dx[i__] = *da * dx[i__]; + dx[i__ + 1] = *da * dx[i__ + 1]; + dx[i__ + 2] = *da * dx[i__ + 2]; + dx[i__ + 3] = *da * dx[i__ + 3]; + dx[i__ + 4] = *da * dx[i__ + 4]; + } + } else { + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + dx[i__] = *da * dx[i__]; + } + } + return 0; + +/* End of DSCAL */ + +} /* dscal_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dstedc.cpp b/lib/linalg/dstedc.cpp new file mode 100644 index 0000000000..e71b373994 --- /dev/null +++ b/lib/linalg/dstedc.cpp @@ -0,0 +1,576 @@ +/* fortran/dstedc.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static integer c__2 = 2; +static doublereal c_b17 = 0.; +static doublereal c_b18 = 1.; +static integer c__1 = 1; + +/* > \brief \b DSTEDC */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSTEDC + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, */ +/* LIWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER COMPZ */ +/* INTEGER INFO, LDZ, LIWORK, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSTEDC computes all eigenvalues and, optionally, eigenvectors of a */ +/* > symmetric tridiagonal matrix using the divide and conquer method. */ +/* > The eigenvectors of a full or band real symmetric matrix can also be */ +/* > found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this */ +/* > matrix to tridiagonal form. */ +/* > */ +/* > This code makes very mild assumptions about floating point */ +/* > arithmetic. It will work on machines with a guard digit in */ +/* > add/subtract, or on those binary machines without guard digits */ +/* > which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ +/* > It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. See DLAED3 for details. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only. */ +/* > = 'I': Compute eigenvectors of tridiagonal matrix also. */ +/* > = 'V': Compute eigenvectors of original dense symmetric */ +/* > matrix also. On entry, Z contains the orthogonal */ +/* > matrix used to reduce the original matrix to */ +/* > tridiagonal form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the diagonal elements of the tridiagonal matrix. */ +/* > On exit, if INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the subdiagonal elements of the tridiagonal matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (LDZ,N) */ +/* > On entry, if COMPZ = 'V', then Z contains the orthogonal */ +/* > matrix used in the reduction to tridiagonal form. */ +/* > On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */ +/* > orthonormal eigenvectors of the original symmetric matrix, */ +/* > and if COMPZ = 'I', Z contains the orthonormal eigenvectors */ +/* > of the symmetric tridiagonal matrix. */ +/* > If COMPZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1. */ +/* > If eigenvectors are desired, then LDZ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. */ +/* > If COMPZ = 'V' and N > 1 then LWORK must be at least */ +/* > ( 1 + 3*N + 2*N*lg N + 4*N**2 ), */ +/* > where lg( N ) = smallest integer k such */ +/* > that 2**k >= N. */ +/* > If COMPZ = 'I' and N > 1 then LWORK must be at least */ +/* > ( 1 + 4*N + N**2 ). */ +/* > Note that for COMPZ = 'I' or 'V', then if N is less than or */ +/* > equal to the minimum divide size, usually 25, then LWORK need */ +/* > only be max(1,2*(N-1)). */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. */ +/* > If COMPZ = 'V' and N > 1 then LIWORK must be at least */ +/* > ( 6 + 6*N + 5*N*lg N ). */ +/* > If COMPZ = 'I' and N > 1 then LIWORK must be at least */ +/* > ( 3 + 5*N ). */ +/* > Note that for COMPZ = 'I' or 'V', then if N is less than or */ +/* > equal to the minimum divide size, usually 25, then LIWORK */ +/* > need only be 1. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal size of the IWORK array, */ +/* > returns this value as the first entry of the IWORK array, and */ +/* > no error message related to LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: The algorithm failed to compute an eigenvalue while */ +/* > working on the submatrix lying in rows and columns */ +/* > INFO/(N+1) through mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA \n */ +/* > Modified by Francoise Tisseur, University of Tennessee */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *lwork, integer *iwork, integer *liwork, integer *info, + ftnlen compz_len) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, m; + doublereal p; + integer ii, lgn; + doublereal eps, tiny; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer lwmin; + extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *); + integer start; + extern doublereal dlamch_(char *, ftnlen); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen), dlacpy_(char *, integer *, integer + *, doublereal *, integer *, doublereal *, integer *, ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + integer finish; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, + ftnlen); + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), dlasrt_(char *, integer *, doublereal *, integer *, + ftnlen); + integer liwmin, icompz; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal orgnrm; + logical lquery; + integer smlsiz, storez, strtrw; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *liwork == -1; + + if (lsame_(compz, (char *)"N", (ftnlen)1, (ftnlen)1)) { + icompz = 0; + } else if (lsame_(compz, (char *)"V", (ftnlen)1, (ftnlen)1)) { + icompz = 1; + } else if (lsame_(compz, (char *)"I", (ftnlen)1, (ftnlen)1)) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + smlsiz = ilaenv_(&c__9, (char *)"DSTEDC", (char *)" ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n <= 1 || icompz == 0) { + liwmin = 1; + lwmin = 1; + } else if (*n <= smlsiz) { + liwmin = 1; + lwmin = *n - 1 << 1; + } else { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (icompz == 1) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 3 + 1 + (*n << 1) * lgn + (i__1 * i__1 << 2); + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { +/* Computing 2nd power */ + i__1 = *n; + lwmin = (*n << 2) + 1 + i__1 * i__1; + liwmin = *n * 5 + 3; + } + } + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSTEDC", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* If the following conditional clause is removed, then the routine */ +/* will use the Divide and Conquer routine to compute only the */ +/* eigenvalues, which requires (3N + 3N**2) real workspace and */ +/* (2 + 5N + 2N lg(N)) integer workspace. */ +/* Since on many architectures DSTERF is much faster than any other */ +/* algorithm for finding eigenvalues only, it is used here */ +/* as the default. If the conditional clause is removed, then */ +/* information on the size of workspace needs to be changed. */ + +/* If COMPZ = 'N', use DSTERF to compute the eigenvalues. */ + + if (icompz == 0) { + dsterf_(n, &d__[1], &e[1], info); + goto L50; + } + +/* If N is smaller than the minimum divide size (SMLSIZ+1), then */ +/* solve the problem with another solver. */ + + if (*n <= smlsiz) { + + dsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info, + (ftnlen)1); + + } else { + +/* If COMPZ = 'V', the Z matrix must be stored elsewhere for later */ +/* use. */ + + if (icompz == 1) { + storez = *n * *n + 1; + } else { + storez = 1; + } + + if (icompz == 2) { + dlaset_((char *)"Full", n, n, &c_b17, &c_b18, &z__[z_offset], ldz, ( + ftnlen)4); + } + +/* Scale. */ + + orgnrm = dlanst_((char *)"M", n, &d__[1], &e[1], (ftnlen)1); + if (orgnrm == 0.) { + goto L50; + } + + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + + start = 1; + +/* while ( START <= N ) */ + +L10: + if (start <= *n) { + +/* Let FINISH be the position of the next subdiagonal entry */ +/* such that E( FINISH ) <= TINY or FINISH = N if no such */ +/* subdiagonal exists. The matrix identified by the elements */ +/* between START and FINISH constitutes an independent */ +/* sub-problem. */ + + finish = start; +L20: + if (finish < *n) { + tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt(( + d__2 = d__[finish + 1], abs(d__2))); + if ((d__1 = e[finish], abs(d__1)) > tiny) { + ++finish; + goto L20; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = finish - start + 1; + if (m == 1) { + start = finish + 1; + goto L10; + } + if (m > smlsiz) { + +/* Scale. */ + + orgnrm = dlanst_((char *)"M", &m, &d__[start], &e[start], (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b18, &m, &c__1, &d__[ + start], &m, info, (ftnlen)1); + i__1 = m - 1; + i__2 = m - 1; + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b18, &i__1, &c__1, &e[ + start], &i__2, info, (ftnlen)1); + + if (icompz == 1) { + strtrw = 1; + } else { + strtrw = start; + } + dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw + + start * z_dim1], ldz, &work[1], n, &work[storez], & + iwork[1], info); + if (*info != 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % + (m + 1) + start - 1; + goto L50; + } + +/* Scale back. */ + + dlascl_((char *)"G", &c__0, &c__0, &c_b18, &orgnrm, &m, &c__1, &d__[ + start], &m, info, (ftnlen)1); + + } else { + if (icompz == 1) { + +/* Since QR won't update a Z matrix which is larger than */ +/* the length of D, we must solve the sub-problem in a */ +/* workspace and then multiply back into Z. */ + + dsteqr_((char *)"I", &m, &d__[start], &e[start], &work[1], &m, & + work[m * m + 1], info, (ftnlen)1); + dlacpy_((char *)"A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[ + storez], n, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", n, &m, &m, &c_b18, &work[storez], n, & + work[1], &m, &c_b17, &z__[start * z_dim1 + 1], + ldz, (ftnlen)1, (ftnlen)1); + } else if (icompz == 2) { + dsteqr_((char *)"I", &m, &d__[start], &e[start], &z__[start + + start * z_dim1], ldz, &work[1], info, (ftnlen)1); + } else { + dsterf_(&m, &d__[start], &e[start], info); + } + if (*info != 0) { + *info = start * (*n + 1) + finish; + goto L50; + } + } + + start = finish + 1; + goto L10; + } + +/* endwhile */ + + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_((char *)"I", n, &d__[1], info, (ftnlen)1); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L30: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + + 1], &c__1); + } +/* L40: */ + } + } + } + +L50: + work[1] = (doublereal) lwmin; + iwork[1] = liwmin; + + return 0; + +/* End of DSTEDC */ + +} /* dstedc_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsteqr.cpp b/lib/linalg/dsteqr.cpp new file mode 100644 index 0000000000..65209d15d3 --- /dev/null +++ b/lib/linalg/dsteqr.cpp @@ -0,0 +1,704 @@ +/* fortran/dsteqr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b9 = 0.; +static doublereal c_b10 = 1.; +static integer c__0 = 0; +static integer c__1 = 1; +static integer c__2 = 2; + +/* > \brief \b DSTEQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSTEQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER COMPZ */ +/* INTEGER INFO, LDZ, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSTEQR computes all eigenvalues and, optionally, eigenvectors of a */ +/* > symmetric tridiagonal matrix using the implicit QL or QR method. */ +/* > The eigenvectors of a full or band symmetric matrix can also be found */ +/* > if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to */ +/* > tridiagonal form. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only. */ +/* > = 'V': Compute eigenvalues and eigenvectors of the original */ +/* > symmetric matrix. On entry, Z must contain the */ +/* > orthogonal matrix used to reduce the original matrix */ +/* > to tridiagonal form. */ +/* > = 'I': Compute eigenvalues and eigenvectors of the */ +/* > tridiagonal matrix. Z is initialized to the identity */ +/* > matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the diagonal elements of the tridiagonal matrix. */ +/* > On exit, if INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* > matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', then Z contains the orthogonal */ +/* > matrix used in the reduction to tridiagonal form. */ +/* > On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */ +/* > orthonormal eigenvectors of the original symmetric matrix, */ +/* > and if COMPZ = 'I', Z contains the orthonormal eigenvectors */ +/* > of the symmetric tridiagonal matrix. */ +/* > If COMPZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > eigenvectors are desired, then LDZ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (max(1,2*N-2)) */ +/* > If COMPZ = 'N', then WORK is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: the algorithm has failed to find all the eigenvalues in */ +/* > a total of 30*N iterations; if INFO = i, then i */ +/* > elements of E have not converged to zero; on exit, D */ +/* > and E contain the elements of a symmetric tridiagonal */ +/* > matrix which is orthogonally similar to the original */ +/* > matrix. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublereal *z__, integer *ldz, doublereal *work, + integer *info, ftnlen compz_len) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal b, c__, f, g; + integer i__, j, k, l, m; + doublereal p, r__, s; + integer l1, ii, mm, lm1, mm1, nm1; + doublereal rt1, rt2, eps; + integer lsv; + doublereal tst, eps2; + integer lend, jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen, ftnlen); + doublereal anorm; + extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *, + doublereal *, integer *), dlaev2_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + integer lendm1, lendp1; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, + ftnlen); + integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen), dlaset_(char *, integer *, integer + *, doublereal *, doublereal *, doublereal *, integer *, ftnlen); + doublereal safmin; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + doublereal safmax; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, + ftnlen); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *, ftnlen); + integer lendsv; + doublereal ssfmin; + integer nmaxit, icompz; + doublereal ssfmax; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, (char *)"N", (ftnlen)1, (ftnlen)1)) { + icompz = 0; + } else if (lsame_(compz, (char *)"V", (ftnlen)1, (ftnlen)1)) { + icompz = 1; + } else if (lsame_(compz, (char *)"I", (ftnlen)1, (ftnlen)1)) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSTEQR", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + z__[z_dim1 + 1] = 1.; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = dlamch_((char *)"E", (ftnlen)1); +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = dlamch_((char *)"S", (ftnlen)1); + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* Compute the eigenvalues and eigenvectors of the tridiagonal */ +/* matrix. */ + + if (icompz == 2) { + dlaset_((char *)"Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz, (ftnlen)4); + } + + nmaxit = *n * 30; + jtot = 0; + +/* Determine where the matrix splits and choose QL or QR iteration */ +/* for each block, according to whether top or bottom diagonal */ +/* element is smaller. */ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (d__1 = e[m], abs(d__1)); + if (tst == 0.) { + goto L30; + } + if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_((char *)"M", &i__1, &d__[l], &e[l], (ftnlen)1); + iscale = 0; + if (anorm == 0.) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info, (ftnlen)1); + i__1 = lend - l; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info, (ftnlen)1); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info, (ftnlen)1); + i__1 = lend - l; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info, (ftnlen)1); + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* QL Iteration */ + +/* Look for small subdiagonal element. */ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + + 1], abs(d__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 */ +/* to compute its eigensystem. */ + + if (m == l + 1) { + if (icompz > 0) { + dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + dlasr_((char *)"R", (char *)"V", (char *)"B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + } else { + dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.); + r__ = dlapy2_(&g, &c_b10); + g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + dlasr_((char *)"R", (char *)"V", (char *)"B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* QR Iteration */ + +/* Look for small superdiagonal element. */ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m - 1], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + - 1], abs(d__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 */ +/* to compute its eigensystem. */ + + if (m == l - 1) { + if (icompz > 0) { + dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + dlasr_((char *)"R", (char *)"V", (char *)"F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + } else { + dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.); + r__ = dlapy2_(&g, &c_b10); + g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + dlasr_((char *)"R", (char *)"V", (char *)"F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_((char *)"G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info, (ftnlen)1); + i__1 = lendsv - lsv; + dlascl_((char *)"G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info, (ftnlen)1); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_((char *)"G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info, (ftnlen)1); + i__1 = lendsv - lsv; + dlascl_((char *)"G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info, (ftnlen)1); + } + +/* Check for no convergence to an eigenvalue after a total */ +/* of N*MAXIT iterations. */ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L150: */ + } + goto L190; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_((char *)"I", n, &d__[1], info, (ftnlen)1); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + +L190: + return 0; + +/* End of DSTEQR */ + +} /* dsteqr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsterf.cpp b/lib/linalg/dsterf.cpp new file mode 100644 index 0000000000..03172b87f3 --- /dev/null +++ b/lib/linalg/dsterf.cpp @@ -0,0 +1,534 @@ +/* fortran/dsterf.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static integer c__1 = 1; +static doublereal c_b33 = 1.; + +/* > \brief \b DSTERF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSTERF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSTERF( N, D, E, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSTERF computes all eigenvalues of a symmetric tridiagonal matrix */ +/* > using the Pal-Walker-Kahan variant of the QL or QR algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the n diagonal elements of the tridiagonal matrix. */ +/* > On exit, if INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* > matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: the algorithm failed to find all of the eigenvalues in */ +/* > a total of 30*N iterations; if INFO = i, then i */ +/* > elements of E have not converged to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup auxOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e, + integer *info) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2, d__3; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal c__; + integer i__, l, m; + doublereal p, r__, s; + integer l1; + doublereal bb, rt1, rt2, eps, rte; + integer lsv; + doublereal eps2, oldc; + integer lend; + doublereal rmax; + integer jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + doublereal gamma, alpha, sigma, anorm; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, + ftnlen); + integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen); + doublereal oldgam, safmin; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal safmax; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, + ftnlen); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *, ftnlen); + integer lendsv; + doublereal ssfmin; + integer nmaxit; + doublereal ssfmax; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --e; + --d__; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n < 0) { + *info = -1; + i__1 = -(*info); + xerbla_((char *)"DSTERF", &i__1, (ftnlen)6); + return 0; + } + if (*n <= 1) { + return 0; + } + +/* Determine the unit roundoff for this environment. */ + + eps = dlamch_((char *)"E", (ftnlen)1); +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = dlamch_((char *)"S", (ftnlen)1); + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + rmax = dlamch_((char *)"O", (ftnlen)1); + +/* Compute the eigenvalues of the tridiagonal matrix. */ + + nmaxit = *n * 30; + sigma = 0.; + jtot = 0; + +/* Determine where the matrix splits and choose QL or QR iteration */ +/* for each block, according to whether top or bottom diagonal */ +/* element is smaller. */ + + l1 = 1; + +L10: + if (l1 > *n) { + goto L170; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + i__1 = *n - 1; + for (m = l1; m <= i__1; ++m) { + if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) * + sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_((char *)"M", &i__1, &d__[l], &e[l], (ftnlen)1); + iscale = 0; + if (anorm == 0.) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info, (ftnlen)1); + i__1 = lend - l; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info, (ftnlen)1); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info, (ftnlen)1); + i__1 = lend - l; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info, (ftnlen)1); + } + + i__1 = lend - 1; + for (i__ = l; i__ <= i__1; ++i__) { +/* Computing 2nd power */ + d__1 = e[i__]; + e[i__] = d__1 * d__1; +/* L40: */ + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend >= l) { + +/* QL Iteration */ + +/* Look for small subdiagonal element. */ + +L50: + if (l != lend) { + i__1 = lend - 1; + for (m = l; m <= i__1; ++m) { + if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + + 1], abs(d__1))) { + goto L70; + } +/* L60: */ + } + } + m = lend; + +L70: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L90; + } + +/* If remaining matrix is 2 by 2, use DLAE2 to compute its */ +/* eigenvalues. */ + + if (m == l + 1) { + rte = sqrt(e[l]); + dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2); + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L50; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l]); + sigma = (d__[l + 1] - p) / (rte * 2.); + r__ = dlapy2_(&sigma, &c_b33); + sigma = p - rte / (sigma + d_sign(&r__, &sigma)); + + c__ = 1.; + s = 0.; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l; + for (i__ = m - 1; i__ >= i__1; --i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m - 1) { + e[i__ + 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__ + 1] = oldgam + (alpha - gamma); + if (c__ != 0.) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L80: */ + } + + e[l] = s * p; + d__[l] = sigma + gamma; + goto L50; + +/* Eigenvalue found. */ + +L90: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L50; + } + goto L150; + + } else { + +/* QR Iteration */ + +/* Look for small superdiagonal element. */ + +L100: + i__1 = lend + 1; + for (m = l; m >= i__1; --m) { + if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m + - 1], abs(d__1))) { + goto L120; + } +/* L110: */ + } + m = lend; + +L120: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L140; + } + +/* If remaining matrix is 2 by 2, use DLAE2 to compute its */ +/* eigenvalues. */ + + if (m == l - 1) { + rte = sqrt(e[l - 1]); + dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2); + d__[l] = rt1; + d__[l - 1] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L100; + } + goto L150; + } + + if (jtot == nmaxit) { + goto L150; + } + ++jtot; + +/* Form shift. */ + + rte = sqrt(e[l - 1]); + sigma = (d__[l - 1] - p) / (rte * 2.); + r__ = dlapy2_(&sigma, &c_b33); + sigma = p - rte / (sigma + d_sign(&r__, &sigma)); + + c__ = 1.; + s = 0.; + gamma = d__[m] - sigma; + p = gamma * gamma; + +/* Inner loop */ + + i__1 = l - 1; + for (i__ = m; i__ <= i__1; ++i__) { + bb = e[i__]; + r__ = p + bb; + if (i__ != m) { + e[i__ - 1] = s * r__; + } + oldc = c__; + c__ = p / r__; + s = bb / r__; + oldgam = gamma; + alpha = d__[i__ + 1]; + gamma = c__ * (alpha - sigma) - s * oldgam; + d__[i__] = oldgam + (alpha - gamma); + if (c__ != 0.) { + p = gamma * gamma / c__; + } else { + p = oldc * bb; + } +/* L130: */ + } + + e[l - 1] = s * p; + d__[l] = sigma + gamma; + goto L100; + +/* Eigenvalue found. */ + +L140: + d__[l] = p; + + --l; + if (l >= lend) { + goto L100; + } + goto L150; + + } + +/* Undo scaling if necessary */ + +L150: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_((char *)"G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info, (ftnlen)1); + } + if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_((char *)"G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info, (ftnlen)1); + } + +/* Check for no convergence to an eigenvalue after a total */ +/* of N*MAXIT iterations. */ + + if (jtot < nmaxit) { + goto L10; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L160: */ + } + goto L180; + +/* Sort eigenvalues in increasing order. */ + +L170: + dlasrt_((char *)"I", n, &d__[1], info, (ftnlen)1); + +L180: + return 0; + +/* End of DSTERF */ + +} /* dsterf_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dswap.cpp b/lib/linalg/dswap.cpp new file mode 100644 index 0000000000..a1eca4fc06 --- /dev/null +++ b/lib/linalg/dswap.cpp @@ -0,0 +1,194 @@ +/* fortran/dswap.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DSWAP */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSWAP(N,DX,INCX,DY,INCY) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION DX(*),DY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSWAP interchanges two vectors. */ +/* > uses unrolled loops for increments equal to 1. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DX */ +/* > \verbatim */ +/* > DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] DY */ +/* > \verbatim */ +/* > DY is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of DY */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy) +{ + /* System generated locals */ + integer i__1; + + /* Local variables */ + integer i__, m, ix, iy, mp1; + doublereal dtemp; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dy; + --dx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + +/* clean-up loop */ + + m = *n % 3; + if (m != 0) { + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; + } + if (*n < 3) { + return 0; + } + } + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 3) { + dtemp = dx[i__]; + dx[i__] = dy[i__]; + dy[i__] = dtemp; + dtemp = dx[i__ + 1]; + dx[i__ + 1] = dy[i__ + 1]; + dy[i__ + 1] = dtemp; + dtemp = dx[i__ + 2]; + dx[i__ + 2] = dy[i__ + 2]; + dy[i__ + 2] = dtemp; + } + } else { + +/* code for unequal increments or equal increments not equal */ +/* to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dtemp = dx[ix]; + dx[ix] = dy[iy]; + dy[iy] = dtemp; + ix += *incx; + iy += *incy; + } + } + return 0; + +/* End of DSWAP */ + +} /* dswap_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsyev.cpp b/lib/linalg/dsyev.cpp new file mode 100644 index 0000000000..603b633195 --- /dev/null +++ b/lib/linalg/dsyev.cpp @@ -0,0 +1,369 @@ +/* fortran/dsyev.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b17 = 1.; + +/* > \brief DSYEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSYEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYEV computes all eigenvalues and, optionally, eigenvectors of a */ +/* > real symmetric matrix A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA, N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= max(1,3*N-1). */ +/* > For optimal efficiency, LWORK >= (NB+2)*N, */ +/* > where NB is the blocksize for DSYTRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYeigen */ + +/* ===================================================================== */ +/* Subroutine */ int dsyev_(char *jobz, char *uplo, integer *n, doublereal *a, + integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *info, ftnlen jobz_len, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer nb; + doublereal eps; + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + logical lower, wantz; + extern doublereal dlamch_(char *, ftnlen); + integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen); + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *); + extern doublereal dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *, ftnlen, ftnlen); + integer indwrk; + extern /* Subroutine */ int dorgtr_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, + ftnlen), dsteqr_(char *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen), + dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *, + ftnlen); + integer llwork; + doublereal smlnum; + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + + /* Function Body */ + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -1; + } else if (! (lower || lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 2) * *n; + lwkopt = max(i__1,i__2); + work[1] = (doublereal) lwkopt; + +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3 - 1; + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = a[a_dim1 + 1]; + work[1] = 2.; + if (wantz) { + a[a_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_((char *)"Safe minimum", (ftnlen)12); + eps = dlamch_((char *)"Precision", (ftnlen)9); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = dlansy_((char *)"M", uplo, n, &a[a_offset], lda, &work[1], (ftnlen)1, ( + ftnlen)1); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + dlascl_(uplo, &c__0, &c__0, &c_b17, &sigma, n, n, &a[a_offset], lda, + info, (ftnlen)1); + } + +/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo, (ftnlen)1); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* DORGTR to generate the orthogonal matrix, then call DSTEQR. */ + + if (! wantz) { + dsterf_(n, &w[1], &work[inde], info); + } else { + dorgtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & + llwork, &iinfo, (ftnlen)1); + dsteqr_(jobz, n, &w[1], &work[inde], &a[a_offset], lda, &work[indtau], + info, (ftnlen)1); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal workspace size. */ + + work[1] = (doublereal) lwkopt; + + return 0; + +/* End of DSYEV */ + +} /* dsyev_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsyevd.cpp b/lib/linalg/dsyevd.cpp new file mode 100644 index 0000000000..a0e0084884 --- /dev/null +++ b/lib/linalg/dsyevd.cpp @@ -0,0 +1,440 @@ +/* fortran/dsyevd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b17 = 1.; + +/* > \brief DSYEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSYEVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, */ +/* LIWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LIWORK, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYEVD computes all eigenvalues and, optionally, eigenvectors of a */ +/* > real symmetric matrix A. If eigenvectors are desired, it uses a */ +/* > divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > */ +/* > Because of large use of BLAS of level 3, DSYEVD needs N**2 more */ +/* > workspace than DSYEVX. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA, N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, */ +/* > dimension (LWORK) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N <= 1, LWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be at least */ +/* > 1 + 6*N + 2*N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK and IWORK */ +/* > arrays, returns these values as the first entries of the WORK */ +/* > and IWORK arrays, and no error message related to LWORK or */ +/* > LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK and IWORK arrays, and no error message related to */ +/* > LWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i and JOBZ = 'N', then the algorithm failed */ +/* > to converge; i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm failed */ +/* > to compute an eigenvalue while working on the submatrix */ +/* > lying in rows and columns INFO/(N+1) through */ +/* > mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYeigen */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA \n */ +/* > Modified by Francoise Tisseur, University of Tennessee \n */ +/* > Modified description of INFO. Sven, 16 Feb 05. \n */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal * + a, integer *lda, doublereal *w, doublereal *work, integer *lwork, + integer *iwork, integer *liwork, integer *info, ftnlen jobz_len, + ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps; + integer inde; + doublereal anrm, rmin, rmax; + integer lopt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo, lwmin, liopt; + logical lower, wantz; + integer indwk2, llwrk2; + extern doublereal dlamch_(char *, ftnlen); + integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen), dstedc_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, ftnlen), dlacpy_( + char *, integer *, integer *, doublereal *, integer *, doublereal + *, integer *, ftnlen); + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *); + extern doublereal dlansy_(char *, char *, integer *, doublereal *, + integer *, doublereal *, ftnlen, ftnlen); + integer indwrk, liwmin; + extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, integer *, ftnlen, ftnlen, + ftnlen), dsytrd_(char *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *, ftnlen); + integer llwork; + doublereal smlnum; + logical lquery; + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ + +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -1; + } else if (! (lower || lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + lopt = lwmin; + liopt = liwmin; + } else { + if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = (*n << 1) + 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = (*n << 1) + *n * ilaenv_(&c__1, (char *)"DSYTRD", + uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lopt = max(i__1,i__2); + liopt = liwmin; + } + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*liwork < liwmin && ! lquery) { + *info = -10; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYEVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + w[1] = a[a_dim1 + 1]; + if (wantz) { + a[a_dim1 + 1] = 1.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_((char *)"Safe minimum", (ftnlen)12); + eps = dlamch_((char *)"Precision", (ftnlen)9); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = dlansy_((char *)"M", uplo, n, &a[a_offset], lda, &work[1], (ftnlen)1, ( + ftnlen)1); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + dlascl_(uplo, &c__0, &c__0, &c_b17, &sigma, n, n, &a[a_offset], lda, + info, (ftnlen)1); + } + +/* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ + + inde = 1; + indtau = inde + *n; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + indwk2 = indwrk + *n * *n; + llwrk2 = *lwork - indwk2 + 1; + + dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo, (ftnlen)1); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ +/* tridiagonal matrix, then call DORMTR to multiply it by the */ +/* Householder transformations stored in A. */ + + if (! wantz) { + dsterf_(n, &w[1], &work[inde], info); + } else { + dstedc_((char *)"I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & + llwrk2, &iwork[1], liwork, info, (ftnlen)1); + dormtr_((char *)"L", uplo, (char *)"N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", n, n, &work[indwrk], n, &a[a_offset], lda, (ftnlen)1); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + d__1 = 1. / sigma; + dscal_(n, &d__1, &w[1], &c__1); + } + + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + return 0; + +/* End of DSYEVD */ + +} /* dsyevd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsygs2.cpp b/lib/linalg/dsygs2.cpp new file mode 100644 index 0000000000..8b0089be28 --- /dev/null +++ b/lib/linalg/dsygs2.cpp @@ -0,0 +1,381 @@ +/* fortran/dsygs2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b6 = -1.; +static integer c__1 = 1; +static doublereal c_b27 = 1.; + +/* > \brief \b DSYGS2 reduces a symmetric definite generalized eigenproblem to standard form, using the factor +ization results obtained from spotrf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSYGS2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYGS2 reduces a real symmetric-definite generalized eigenproblem */ +/* > to standard form. */ +/* > */ +/* > If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* > and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ +/* > */ +/* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* > B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L. */ +/* > */ +/* > B must have been previously factorized as U**T *U or L*L**T by DPOTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ +/* > = 2 or 3: compute U*A*U**T or L**T *A*L. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > symmetric matrix A is stored, and how B has been factorized. */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > n by n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n by n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the transformed matrix, stored in the */ +/* > same format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,N) */ +/* > The triangular factor from the Cholesky factorization of B, */ +/* > as returned by DPOTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dsygs2_(integer *itype, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1; + + /* Local variables */ + integer k; + doublereal ct, akk, bkk; + extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen), dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, + ftnlen), dtrsv_(char *, char *, char *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYGS2", &i__1, (ftnlen)6); + return 0; + } + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U**T)*A*inv(U) */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the upper triangle of A(k:n,k:n) */ + + akk = a[k + k * a_dim1]; + bkk = b[k + k * b_dim1]; +/* Computing 2nd power */ + d__1 = bkk; + akk /= d__1 * d__1; + a[k + k * a_dim1] = akk; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + dscal_(&i__2, &d__1, &a[k + (k + 1) * a_dim1], lda); + ct = akk * -.5; + i__2 = *n - k; + daxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( + k + 1) * a_dim1], lda); + i__2 = *n - k; + dsyr2_(uplo, &i__2, &c_b6, &a[k + (k + 1) * a_dim1], lda, + &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) + * a_dim1], lda, (ftnlen)1); + i__2 = *n - k; + daxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + ( + k + 1) * a_dim1], lda); + i__2 = *n - k; + dtrsv_(uplo, (char *)"Transpose", (char *)"Non-unit", &i__2, &b[k + 1 + ( + k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], + lda, (ftnlen)1, (ftnlen)9, (ftnlen)8); + } +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L**T) */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the lower triangle of A(k:n,k:n) */ + + akk = a[k + k * a_dim1]; + bkk = b[k + k * b_dim1]; +/* Computing 2nd power */ + d__1 = bkk; + akk /= d__1 * d__1; + a[k + k * a_dim1] = akk; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + dscal_(&i__2, &d__1, &a[k + 1 + k * a_dim1], &c__1); + ct = akk * -.5; + i__2 = *n - k; + daxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + + 1 + k * a_dim1], &c__1); + i__2 = *n - k; + dsyr2_(uplo, &i__2, &c_b6, &a[k + 1 + k * a_dim1], &c__1, + &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) + * a_dim1], lda, (ftnlen)1); + i__2 = *n - k; + daxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + + 1 + k * a_dim1], &c__1); + i__2 = *n - k; + dtrsv_(uplo, (char *)"No transpose", (char *)"Non-unit", &i__2, &b[k + 1 + + (k + 1) * b_dim1], ldb, &a[k + 1 + k * a_dim1], + &c__1, (ftnlen)1, (ftnlen)12, (ftnlen)8); + } +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U**T */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the upper triangle of A(1:k,1:k) */ + + akk = a[k + k * a_dim1]; + bkk = b[k + k * b_dim1]; + i__2 = k - 1; + dtrmv_(uplo, (char *)"No transpose", (char *)"Non-unit", &i__2, &b[b_offset], + ldb, &a[k * a_dim1 + 1], &c__1, (ftnlen)1, (ftnlen)12, + (ftnlen)8); + ct = akk * .5; + i__2 = k - 1; + daxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + + 1], &c__1); + i__2 = k - 1; + dsyr2_(uplo, &i__2, &c_b27, &a[k * a_dim1 + 1], &c__1, &b[k * + b_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__2 = k - 1; + daxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + + 1], &c__1); + i__2 = k - 1; + dscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1); +/* Computing 2nd power */ + d__1 = bkk; + a[k + k * a_dim1] = akk * (d__1 * d__1); +/* L30: */ + } + } else { + +/* Compute L**T *A*L */ + + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + +/* Update the lower triangle of A(1:k,1:k) */ + + akk = a[k + k * a_dim1]; + bkk = b[k + k * b_dim1]; + i__2 = k - 1; + dtrmv_(uplo, (char *)"Transpose", (char *)"Non-unit", &i__2, &b[b_offset], + ldb, &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)9, ( + ftnlen)8); + ct = akk * .5; + i__2 = k - 1; + daxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + dsyr2_(uplo, &i__2, &c_b27, &a[k + a_dim1], lda, &b[k + + b_dim1], ldb, &a[a_offset], lda, (ftnlen)1); + i__2 = k - 1; + daxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + dscal_(&i__2, &bkk, &a[k + a_dim1], lda); +/* Computing 2nd power */ + d__1 = bkk; + a[k + k * a_dim1] = akk * (d__1 * d__1); +/* L40: */ + } + } + } + return 0; + +/* End of DSYGS2 */ + +} /* dsygs2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsygst.cpp b/lib/linalg/dsygst.cpp new file mode 100644 index 0000000000..2f5189a6f1 --- /dev/null +++ b/lib/linalg/dsygst.cpp @@ -0,0 +1,436 @@ +/* fortran/dsygst.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b14 = 1.; +static doublereal c_b16 = -.5; +static doublereal c_b19 = -1.; +static doublereal c_b52 = .5; + +/* > \brief \b DSYGST */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSYGST + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYGST reduces a real symmetric-definite generalized eigenproblem */ +/* > to standard form. */ +/* > */ +/* > If ITYPE = 1, the problem is A*x = lambda*B*x, */ +/* > and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ +/* > */ +/* > If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ +/* > B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */ +/* > */ +/* > B must have been previously factorized as U**T*U or L*L**T by DPOTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ +/* > = 2 or 3: compute U*A*U**T or L**T*A*L. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored and B is factored as */ +/* > U**T*U; */ +/* > = 'L': Lower triangle of A is stored and B is factored as */ +/* > L*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > */ +/* > On exit, if INFO = 0, the transformed matrix, stored in the */ +/* > same format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB,N) */ +/* > The triangular factor from the Cholesky factorization of B, */ +/* > as returned by DPOTRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dsygst_(integer *itype, char *uplo, integer *n, + doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * + info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer k, kb, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dsymm_( + char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, ftnlen, ftnlen); + logical upper; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dsygs2_( + integer *, char *, integer *, doublereal *, integer *, doublereal + *, integer *, integer *, ftnlen), dsyr2k_(char *, char *, integer + *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen) + , xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYGST", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, (char *)"DSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + dsygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, ( + ftnlen)1); + } else { + +/* Use blocked code */ + + if (*itype == 1) { + if (upper) { + +/* Compute inv(U**T)*A*inv(U) */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = min(i__3,nb); + +/* Update the upper triangle of A(k:n,k:n) */ + + dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info, (ftnlen)1); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + dtrsm_((char *)"Left", uplo, (char *)"Transpose", (char *)"Non-unit", &kb, & + i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + + (k + kb) * a_dim1], lda, (ftnlen)4, (ftnlen)1, + (ftnlen)9, (ftnlen)8); + i__3 = *n - k - kb + 1; + dsymm_((char *)"Left", uplo, &kb, &i__3, &c_b16, &a[k + k * + a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, + &c_b14, &a[k + (k + kb) * a_dim1], lda, ( + ftnlen)4, (ftnlen)1); + i__3 = *n - k - kb + 1; + dsyr2k_(uplo, (char *)"Transpose", &i__3, &kb, &c_b19, &a[k + + (k + kb) * a_dim1], lda, &b[k + (k + kb) * + b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * + a_dim1], lda, (ftnlen)1, (ftnlen)9); + i__3 = *n - k - kb + 1; + dsymm_((char *)"Left", uplo, &kb, &i__3, &c_b16, &a[k + k * + a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, + &c_b14, &a[k + (k + kb) * a_dim1], lda, ( + ftnlen)4, (ftnlen)1); + i__3 = *n - k - kb + 1; + dtrsm_((char *)"Right", uplo, (char *)"No transpose", (char *)"Non-unit", &kb, + &i__3, &c_b14, &b[k + kb + (k + kb) * b_dim1] + , ldb, &a[k + (k + kb) * a_dim1], lda, ( + ftnlen)5, (ftnlen)1, (ftnlen)12, (ftnlen)8); + } +/* L10: */ + } + } else { + +/* Compute inv(L)*A*inv(L**T) */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = min(i__3,nb); + +/* Update the lower triangle of A(k:n,k:n) */ + + dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info, (ftnlen)1); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + dtrsm_((char *)"Right", uplo, (char *)"Transpose", (char *)"Non-unit", &i__3, + &kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k + + kb + k * a_dim1], lda, (ftnlen)5, (ftnlen)1, ( + ftnlen)9, (ftnlen)8); + i__3 = *n - k - kb + 1; + dsymm_((char *)"Right", uplo, &i__3, &kb, &c_b16, &a[k + k * + a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & + c_b14, &a[k + kb + k * a_dim1], lda, (ftnlen) + 5, (ftnlen)1); + i__3 = *n - k - kb + 1; + dsyr2k_(uplo, (char *)"No transpose", &i__3, &kb, &c_b19, &a[ + k + kb + k * a_dim1], lda, &b[k + kb + k * + b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * + a_dim1], lda, (ftnlen)1, (ftnlen)12); + i__3 = *n - k - kb + 1; + dsymm_((char *)"Right", uplo, &i__3, &kb, &c_b16, &a[k + k * + a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & + c_b14, &a[k + kb + k * a_dim1], lda, (ftnlen) + 5, (ftnlen)1); + i__3 = *n - k - kb + 1; + dtrsm_((char *)"Left", uplo, (char *)"No transpose", (char *)"Non-unit", & + i__3, &kb, &c_b14, &b[k + kb + (k + kb) * + b_dim1], ldb, &a[k + kb + k * a_dim1], lda, ( + ftnlen)4, (ftnlen)1, (ftnlen)12, (ftnlen)8); + } +/* L20: */ + } + } + } else { + if (upper) { + +/* Compute U*A*U**T */ + + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = min(i__3,nb); + +/* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ + + i__3 = k - 1; + dtrmm_((char *)"Left", uplo, (char *)"No transpose", (char *)"Non-unit", &i__3, & + kb, &c_b14, &b[b_offset], ldb, &a[k * a_dim1 + 1], + lda, (ftnlen)4, (ftnlen)1, (ftnlen)12, (ftnlen)8) + ; + i__3 = k - 1; + dsymm_((char *)"Right", uplo, &i__3, &kb, &c_b52, &a[k + k * + a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ + k * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)1); + i__3 = k - 1; + dsyr2k_(uplo, (char *)"No transpose", &i__3, &kb, &c_b14, &a[k * + a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, + &a[a_offset], lda, (ftnlen)1, (ftnlen)12); + i__3 = k - 1; + dsymm_((char *)"Right", uplo, &i__3, &kb, &c_b52, &a[k + k * + a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ + k * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)1); + i__3 = k - 1; + dtrmm_((char *)"Right", uplo, (char *)"Transpose", (char *)"Non-unit", &i__3, &kb, + &c_b14, &b[k + k * b_dim1], ldb, &a[k * a_dim1 + + 1], lda, (ftnlen)5, (ftnlen)1, (ftnlen)9, (ftnlen) + 8); + dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info, (ftnlen)1); +/* L30: */ + } + } else { + +/* Compute L**T*A*L */ + + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { +/* Computing MIN */ + i__3 = *n - k + 1; + kb = min(i__3,nb); + +/* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ + + i__3 = k - 1; + dtrmm_((char *)"Right", uplo, (char *)"No transpose", (char *)"Non-unit", &kb, & + i__3, &c_b14, &b[b_offset], ldb, &a[k + a_dim1], + lda, (ftnlen)5, (ftnlen)1, (ftnlen)12, (ftnlen)8); + i__3 = k - 1; + dsymm_((char *)"Left", uplo, &kb, &i__3, &c_b52, &a[k + k * + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + + a_dim1], lda, (ftnlen)4, (ftnlen)1); + i__3 = k - 1; + dsyr2k_(uplo, (char *)"Transpose", &i__3, &kb, &c_b14, &a[k + + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[ + a_offset], lda, (ftnlen)1, (ftnlen)9); + i__3 = k - 1; + dsymm_((char *)"Left", uplo, &kb, &i__3, &c_b52, &a[k + k * + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + + a_dim1], lda, (ftnlen)4, (ftnlen)1); + i__3 = k - 1; + dtrmm_((char *)"Left", uplo, (char *)"Transpose", (char *)"Non-unit", &kb, &i__3, + &c_b14, &b[k + k * b_dim1], ldb, &a[k + a_dim1], + lda, (ftnlen)4, (ftnlen)1, (ftnlen)9, (ftnlen)8); + dsygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + + k * b_dim1], ldb, info, (ftnlen)1); +/* L40: */ + } + } + } + } + return 0; + +/* End of DSYGST */ + +} /* dsygst_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsygv.cpp b/lib/linalg/dsygv.cpp new file mode 100644 index 0000000000..e7078e8006 --- /dev/null +++ b/lib/linalg/dsygv.cpp @@ -0,0 +1,384 @@ +/* fortran/dsygv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b16 = 1.; + +/* > \brief \b DSYGV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSYGV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */ +/* LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYGV computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a real generalized symmetric-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ +/* > Here A and B are assumed to be symmetric and B is also */ +/* > positive definite. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA, N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > matrix Z of eigenvectors. The eigenvectors are normalized */ +/* > as follows: */ +/* > if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* > if ITYPE = 3, Z**T*inv(B)*Z = I. */ +/* > If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* > or the lower triangle (if UPLO='L') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB, N) */ +/* > On entry, the symmetric positive definite matrix B. */ +/* > If UPLO = 'U', the leading N-by-N upper triangular part of B */ +/* > contains the upper triangular part of the matrix B. */ +/* > If UPLO = 'L', the leading N-by-N lower triangular part of B */ +/* > contains the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**T*U or B = L*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= max(1,3*N-1). */ +/* > For optimal efficiency, LWORK >= (NB+2)*N, */ +/* > where NB is the blocksize for DSYTRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: DPOTRF or DSYEV returned an error code: */ +/* > <= N: if INFO = i, DSYEV failed to converge; */ +/* > i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYeigen */ + +/* ===================================================================== */ +/* Subroutine */ int dsygv_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *w, doublereal *work, integer *lwork, integer *info, + ftnlen jobz_len, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + + /* Local variables */ + integer nb, neig; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + char trans[1]; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical upper; + extern /* Subroutine */ int dsyev_(char *, char *, integer *, doublereal * + , integer *, doublereal *, doublereal *, integer *, integer *, + ftnlen, ftnlen); + logical wantz; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int dpotrf_(char *, integer *, doublereal *, + integer *, integer *, ftnlen); + integer lwkmin; + extern /* Subroutine */ int dsygst_(integer *, char *, integer *, + doublereal *, integer *, doublereal *, integer *, integer *, + ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --w; + --work; + + /* Function Body */ + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (! (upper || lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + + if (*info == 0) { +/* Computing MAX */ + i__1 = 1, i__2 = *n * 3 - 1; + lwkmin = max(i__1,i__2); + nb = ilaenv_(&c__1, (char *)"DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = lwkmin, i__2 = (nb + 2) * *n; + lwkopt = max(i__1,i__2); + work[1] = (doublereal) lwkopt; + + if (*lwork < lwkmin && ! lquery) { + *info = -11; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYGV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + dpotrf_(uplo, n, &b[b_offset], ldb, info, (ftnlen)1); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + dsygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, ( + ftnlen)1); + dsyev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, info, ( + ftnlen)1, (ftnlen)1); + + if (wantz) { + +/* Backtransform eigenvectors to the original problem. */ + + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'T'; + } + + dtrsm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, &neig, &c_b16, &b[ + b_offset], ldb, &a[a_offset], lda, (ftnlen)4, (ftnlen)1, ( + ftnlen)1, (ftnlen)8); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**T*y */ + + if (upper) { + *(unsigned char *)trans = 'T'; + } else { + *(unsigned char *)trans = 'N'; + } + + dtrmm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, &neig, &c_b16, &b[ + b_offset], ldb, &a[a_offset], lda, (ftnlen)4, (ftnlen)1, ( + ftnlen)1, (ftnlen)8); + } + } + + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DSYGV */ + +} /* dsygv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsygvd.cpp b/lib/linalg/dsygvd.cpp new file mode 100644 index 0000000000..80513dfdcc --- /dev/null +++ b/lib/linalg/dsygvd.cpp @@ -0,0 +1,445 @@ +/* fortran/dsygvd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b11 = 1.; + +/* > \brief \b DSYGVD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSYGVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, */ +/* LWORK, IWORK, LIWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYGVD computes all the eigenvalues, and optionally, the eigenvectors */ +/* > of a real generalized symmetric-definite eigenproblem, of the form */ +/* > A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ +/* > B are assumed to be symmetric and B is also positive definite. */ +/* > If eigenvectors are desired, it uses a divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ITYPE */ +/* > \verbatim */ +/* > ITYPE is INTEGER */ +/* > Specifies the problem type to be solved: */ +/* > = 1: A*x = (lambda)*B*x */ +/* > = 2: A*B*x = (lambda)*x */ +/* > = 3: B*A*x = (lambda)*x */ +/* > \endverbatim */ +/* > */ +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangles of A and B are stored; */ +/* > = 'L': Lower triangles of A and B are stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrices A and B. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA, N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > matrix Z of eigenvectors. The eigenvectors are normalized */ +/* > as follows: */ +/* > if ITYPE = 1 or 2, Z**T*B*Z = I; */ +/* > if ITYPE = 3, Z**T*inv(B)*Z = I. */ +/* > If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ +/* > or the lower triangle (if UPLO='L') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB, N) */ +/* > On entry, the symmetric matrix B. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of B contains the */ +/* > upper triangular part of the matrix B. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of B contains */ +/* > the lower triangular part of the matrix B. */ +/* > */ +/* > On exit, if INFO <= N, the part of B containing the matrix is */ +/* > overwritten by the triangular factor U or L from the Cholesky */ +/* > factorization B = U**T*U or B = L*L**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If N <= 1, LWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. */ +/* > If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK and IWORK */ +/* > arrays, returns these values as the first entries of the WORK */ +/* > and IWORK arrays, and no error message related to LWORK or */ +/* > LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK >= 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK >= 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK and IWORK arrays, and no error message related to */ +/* > LWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: DPOTRF or DSYEVD returned an error code: */ +/* > <= N: if INFO = i and JOBZ = 'N', then the algorithm */ +/* > failed to converge; i off-diagonal elements of an */ +/* > intermediate tridiagonal form did not converge to */ +/* > zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm */ +/* > failed to compute an eigenvalue while working on */ +/* > the submatrix lying in rows and columns INFO/(N+1) */ +/* > through mod(INFO,N+1); */ +/* > > N: if INFO = N + i, for 1 <= i <= N, then the leading */ +/* > minor of order i of B is not positive definite. */ +/* > The factorization of B could not be completed and */ +/* > no eigenvalues or eigenvectors were computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Modified so that no backsubstitution is performed if DSYEVD fails to */ +/* > converge (NEIG in old code could be greater than N causing out of */ +/* > bounds reference to A - reported by Ralf Meyer). Also corrected the */ +/* > description of INFO and the test on ITYPE. Sven, 16 Feb 05. */ +/* > \endverbatim */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsygvd_(integer *itype, char *jobz, char *uplo, integer * + n, doublereal *a, integer *lda, doublereal *b, integer *ldb, + doublereal *w, doublereal *work, integer *lwork, integer *iwork, + integer *liwork, integer *info, ftnlen jobz_len, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1, d__2; + + /* Local variables */ + integer lopt; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + integer lwmin; + char trans[1]; + integer liopt; + extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical upper, wantz; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen), dpotrf_( + char *, integer *, doublereal *, integer *, integer *, ftnlen); + integer liwmin; + extern /* Subroutine */ int dsyevd_(char *, char *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen), dsygst_(integer *, char *, + integer *, doublereal *, integer *, doublereal *, integer *, + integer *, ftnlen); + logical lquery; + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --w; + --work; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1 || *liwork == -1; + + *info = 0; + if (*n <= 1) { + liwmin = 1; + lwmin = 1; + } else if (wantz) { + liwmin = *n * 5 + 3; +/* Computing 2nd power */ + i__1 = *n; + lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); + } else { + liwmin = 1; + lwmin = (*n << 1) + 1; + } + lopt = lwmin; + liopt = liwmin; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (! (wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (! (upper || lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1,*n)) { + *info = -6; + } else if (*ldb < max(1,*n)) { + *info = -8; + } + + if (*info == 0) { + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -11; + } else if (*liwork < liwmin && ! lquery) { + *info = -13; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYGVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Form a Cholesky factorization of B. */ + + dpotrf_(uplo, n, &b[b_offset], ldb, info, (ftnlen)1); + if (*info != 0) { + *info = *n + *info; + return 0; + } + +/* Transform problem to standard eigenvalue problem and solve. */ + + dsygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, ( + ftnlen)1); + dsyevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &iwork[ + 1], liwork, info, (ftnlen)1, (ftnlen)1); +/* Computing MAX */ + d__1 = (doublereal) lopt; + lopt = (integer) max(d__1,work[1]); +/* Computing MAX */ + d__1 = (doublereal) liopt, d__2 = (doublereal) iwork[1]; + liopt = (integer) max(d__1,d__2); + + if (wantz && *info == 0) { + +/* Backtransform eigenvectors to the original problem. */ + + if (*itype == 1 || *itype == 2) { + +/* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ +/* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y */ + + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'T'; + } + + dtrsm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, n, &c_b11, &b[b_offset] + , ldb, &a[a_offset], lda, (ftnlen)4, (ftnlen)1, (ftnlen)1, + (ftnlen)8); + + } else if (*itype == 3) { + +/* For B*A*x=(lambda)*x; */ +/* backtransform eigenvectors: x = L*y or U**T*y */ + + if (upper) { + *(unsigned char *)trans = 'T'; + } else { + *(unsigned char *)trans = 'N'; + } + + dtrmm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, n, &c_b11, &b[b_offset] + , ldb, &a[a_offset], lda, (ftnlen)4, (ftnlen)1, (ftnlen)1, + (ftnlen)8); + } + } + + work[1] = (doublereal) lopt; + iwork[1] = liopt; + + return 0; + +/* End of DSYGVD */ + +} /* dsygvd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsymm.cpp b/lib/linalg/dsymm.cpp new file mode 100644 index 0000000000..223551570d --- /dev/null +++ b/lib/linalg/dsymm.cpp @@ -0,0 +1,440 @@ +/* fortran/dsymm.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DSYMM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA,BETA */ +/* INTEGER LDA,LDB,LDC,M,N */ +/* CHARACTER SIDE,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYMM performs one of the matrix-matrix operations */ +/* > */ +/* > C := alpha*A*B + beta*C, */ +/* > */ +/* > or */ +/* > */ +/* > C := alpha*B*A + beta*C, */ +/* > */ +/* > where alpha and beta are scalars, A is a symmetric matrix and B and */ +/* > C are m by n matrices. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > On entry, SIDE specifies whether the symmetric matrix A */ +/* > appears on the left or right in the operation as follows: */ +/* > */ +/* > SIDE = 'L' or 'l' C := alpha*A*B + beta*C, */ +/* > */ +/* > SIDE = 'R' or 'r' C := alpha*B*A + beta*C, */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the symmetric matrix A is to be */ +/* > referenced as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' Only the upper triangular part of the */ +/* > symmetric matrix is to be referenced. */ +/* > */ +/* > UPLO = 'L' or 'l' Only the lower triangular part of the */ +/* > symmetric matrix is to be referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix C. */ +/* > M must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix C. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is */ +/* > m when SIDE = 'L' or 'l' and is n otherwise. */ +/* > Before entry with SIDE = 'L' or 'l', the m by m part of */ +/* > the array A must contain the symmetric matrix, such that */ +/* > when UPLO = 'U' or 'u', the leading m by m upper triangular */ +/* > part of the array A must contain the upper triangular part */ +/* > of the symmetric matrix and the strictly lower triangular */ +/* > part of A is not referenced, and when UPLO = 'L' or 'l', */ +/* > the leading m by m lower triangular part of the array A */ +/* > must contain the lower triangular part of the symmetric */ +/* > matrix and the strictly upper triangular part of A is not */ +/* > referenced. */ +/* > Before entry with SIDE = 'R' or 'r', the n by n part of */ +/* > the array A must contain the symmetric matrix, such that */ +/* > when UPLO = 'U' or 'u', the leading n by n upper triangular */ +/* > part of the array A must contain the upper triangular part */ +/* > of the symmetric matrix and the strictly lower triangular */ +/* > part of A is not referenced, and when UPLO = 'L' or 'l', */ +/* > the leading n by n lower triangular part of the array A */ +/* > must contain the lower triangular part of the symmetric */ +/* > matrix and the strictly upper triangular part of A is not */ +/* > referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* > LDA must be at least max( 1, m ), otherwise LDA must be at */ +/* > least max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension ( LDB, N ) */ +/* > Before entry, the leading m by n part of the array B must */ +/* > contain the matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > On entry, LDB specifies the first dimension of B as declared */ +/* > in the calling (sub) program. LDB must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION. */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then C need not be set on input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension ( LDC, N ) */ +/* > Before entry, the leading m by n part of the array C must */ +/* > contain the matrix C, except when beta is zero, in which */ +/* > case C need not be set on entry. */ +/* > On exit, the array C is overwritten by the m by n updated */ +/* > matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > On entry, LDC specifies the first dimension of C as declared */ +/* > in the calling (sub) program. LDC must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsymm_(char *side, char *uplo, integer *m, integer *n, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, ftnlen + side_len, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__, j, k, info; + doublereal temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Set NROWA as the number of rows of A. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + nrowa = *m; + } else { + nrowa = *n; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + +/* Test the input parameters. */ + + info = 0; + if (! lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, (char *)"R", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,*m)) { + info = 9; + } else if (*ldc < max(1,*m)) { + info = 12; + } + if (info != 0) { + xerbla_((char *)"DSYMM ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form C := alpha*A*B + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = *alpha * b[i__ + j * b_dim1]; + temp2 = 0.; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; + temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; +/* L50: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + + *alpha * temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * a[i__ + i__ * a_dim1] + *alpha * + temp2; + } +/* L60: */ + } +/* L70: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp1 = *alpha * b[i__ + j * b_dim1]; + temp2 = 0.; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; + temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; +/* L80: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + + *alpha * temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * a[i__ + i__ * a_dim1] + *alpha * + temp2; + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form C := alpha*B*A + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * a[j + j * a_dim1]; + if (*beta == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1]; +/* L110: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * b[i__ + j * b_dim1]; +/* L120: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (upper) { + temp1 = *alpha * a[k + j * a_dim1]; + } else { + temp1 = *alpha * a[j + k * a_dim1]; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; +/* L130: */ + } +/* L140: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (upper) { + temp1 = *alpha * a[j + k * a_dim1]; + } else { + temp1 = *alpha * a[k + j * a_dim1]; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; +/* L150: */ + } +/* L160: */ + } +/* L170: */ + } + } + + return 0; + +/* End of DSYMM */ + +} /* dsymm_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsymv.cpp b/lib/linalg/dsymv.cpp new file mode 100644 index 0000000000..4dc77ad743 --- /dev/null +++ b/lib/linalg/dsymv.cpp @@ -0,0 +1,388 @@ +/* fortran/dsymv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DSYMV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA,BETA */ +/* INTEGER INCX,INCY,LDA,N */ +/* CHARACTER UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),X(*),Y(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYMV performs the matrix-vector operation */ +/* > */ +/* > y := alpha*A*x + beta*y, */ +/* > */ +/* > where alpha and beta are scalars, x and y are n element vectors and */ +/* > A is an n by n symmetric matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the array A is to be referenced as */ +/* > follows: */ +/* > */ +/* > UPLO = 'U' or 'u' Only the upper triangular part of A */ +/* > is to be referenced. */ +/* > */ +/* > UPLO = 'L' or 'l' Only the lower triangular part of A */ +/* > is to be referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular part of the symmetric matrix and the strictly */ +/* > lower triangular part of A is not referenced. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular part of the symmetric matrix and the strictly */ +/* > upper triangular part of A is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION. */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then Y need not be set on input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Y */ +/* > \verbatim */ +/* > Y is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* > Before entry, the incremented array Y must contain the n */ +/* > element vector y. On exit, Y is overwritten by the updated */ +/* > vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > The vector and matrix arguments are not referenced when N = 0, or M = 0 */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha, + doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal + *beta, doublereal *y, integer *incy, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"DSYMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0. && *beta == 1.) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through the triangular part */ +/* of A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * a[j + j * a_dim1]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * a[j + j * a_dim1]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[i__ + j * a_dim1]; + temp2 += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DSYMV */ + +} /* dsymv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsyr2.cpp b/lib/linalg/dsyr2.cpp new file mode 100644 index 0000000000..f017821e1d --- /dev/null +++ b/lib/linalg/dsyr2.cpp @@ -0,0 +1,347 @@ +/* fortran/dsyr2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DSYR2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA */ +/* INTEGER INCX,INCY,LDA,N */ +/* CHARACTER UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),X(*),Y(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYR2 performs the symmetric rank 2 operation */ +/* > */ +/* > A := alpha*x*y**T + alpha*y*x**T + A, */ +/* > */ +/* > where alpha is a scalar, x and y are n element vectors and A is an n */ +/* > by n symmetric matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the array A is to be referenced as */ +/* > follows: */ +/* > */ +/* > UPLO = 'U' or 'u' Only the upper triangular part of A */ +/* > is to be referenced. */ +/* > */ +/* > UPLO = 'L' or 'l' Only the lower triangular part of A */ +/* > is to be referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y */ +/* > \verbatim */ +/* > Y is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* > Before entry, the incremented array Y must contain the n */ +/* > element vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular part of the symmetric matrix and the strictly */ +/* > lower triangular part of A is not referenced. On exit, the */ +/* > upper triangular part of the array A is overwritten by the */ +/* > upper triangular part of the updated matrix. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular part of the symmetric matrix and the strictly */ +/* > upper triangular part of A is not referenced. On exit, the */ +/* > lower triangular part of the array A is overwritten by the */ +/* > lower triangular part of the updated matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, + doublereal *x, integer *incx, doublereal *y, integer *incy, + doublereal *a, integer *lda, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_((char *)"DSYR2 ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.) { + return 0; + } + +/* Set up the start points in X and Y if the increments are not both */ +/* unity. */ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through the triangular part */ +/* of A. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L10: */ + } + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = kx; + iy = ky; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L30: */ + } + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0. || y[j] != 0.) { + temp1 = *alpha * y[j]; + temp2 = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * + temp1 + y[i__] * temp2; +/* L50: */ + } + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0. || y[jy] != 0.) { + temp1 = *alpha * y[jy]; + temp2 = *alpha * x[jx]; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * + temp1 + y[iy] * temp2; + ix += *incx; + iy += *incy; +/* L70: */ + } + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of DSYR2 */ + +} /* dsyr2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsyr2k.cpp b/lib/linalg/dsyr2k.cpp new file mode 100644 index 0000000000..33d32b3440 --- /dev/null +++ b/lib/linalg/dsyr2k.cpp @@ -0,0 +1,485 @@ +/* fortran/dsyr2k.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DSYR2K */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA,BETA */ +/* INTEGER K,LDA,LDB,LDC,N */ +/* CHARACTER TRANS,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYR2K performs one of the symmetric rank 2k operations */ +/* > */ +/* > C := alpha*A*B**T + alpha*B*A**T + beta*C, */ +/* > */ +/* > or */ +/* > */ +/* > C := alpha*A**T*B + alpha*B**T*A + beta*C, */ +/* > */ +/* > where alpha and beta are scalars, C is an n by n symmetric matrix */ +/* > and A and B are n by k matrices in the first case and k by n */ +/* > matrices in the second case. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the array C is to be referenced as */ +/* > follows: */ +/* > */ +/* > UPLO = 'U' or 'u' Only the upper triangular part of C */ +/* > is to be referenced. */ +/* > */ +/* > UPLO = 'L' or 'l' Only the lower triangular part of C */ +/* > is to be referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' C := alpha*A*B**T + alpha*B*A**T + */ +/* > beta*C. */ +/* > */ +/* > TRANS = 'T' or 't' C := alpha*A**T*B + alpha*B**T*A + */ +/* > beta*C. */ +/* > */ +/* > TRANS = 'C' or 'c' C := alpha*A**T*B + alpha*B**T*A + */ +/* > beta*C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix C. N must be */ +/* > at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > On entry with TRANS = 'N' or 'n', K specifies the number */ +/* > of columns of the matrices A and B, and on entry with */ +/* > TRANS = 'T' or 't' or 'C' or 'c', K specifies the number */ +/* > of rows of the matrices A and B. K must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is */ +/* > k when TRANS = 'N' or 'n', and is n otherwise. */ +/* > Before entry with TRANS = 'N' or 'n', the leading n by k */ +/* > part of the array A must contain the matrix A, otherwise */ +/* > the leading k by n part of the array A must contain the */ +/* > matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* > then LDA must be at least max( 1, n ), otherwise LDA must */ +/* > be at least max( 1, k ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is */ +/* > k when TRANS = 'N' or 'n', and is n otherwise. */ +/* > Before entry with TRANS = 'N' or 'n', the leading n by k */ +/* > part of the array B must contain the matrix B, otherwise */ +/* > the leading k by n part of the array B must contain the */ +/* > matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > On entry, LDB specifies the first dimension of B as declared */ +/* > in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* > then LDB must be at least max( 1, n ), otherwise LDB must */ +/* > be at least max( 1, k ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION. */ +/* > On entry, BETA specifies the scalar beta. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension ( LDC, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array C must contain the upper */ +/* > triangular part of the symmetric matrix and the strictly */ +/* > lower triangular part of C is not referenced. On exit, the */ +/* > upper triangular part of the array C is overwritten by the */ +/* > upper triangular part of the updated matrix. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array C must contain the lower */ +/* > triangular part of the symmetric matrix and the strictly */ +/* > upper triangular part of C is not referenced. On exit, the */ +/* > lower triangular part of the array C is overwritten by the */ +/* > lower triangular part of the updated matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > On entry, LDC specifies the first dimension of C as declared */ +/* > in the calling (sub) program. LDC must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *b, + integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, ftnlen + uplo_len, ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + integer i__, j, l, info; + doublereal temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + + info = 0; + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_((char *)"DSYR2K", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form C := alpha*A*B**T + alpha*B*A**T + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) { + temp1 = *alpha * b[j + l * b_dim1]; + temp2 = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[ + i__ + l * a_dim1] * temp1 + b[i__ + l * + b_dim1] * temp2; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A**T*B + alpha*B**T*A + C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = 0.; + temp2 = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1 = 0.; + temp2 = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1]; + temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * + temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + *alpha * temp1 + *alpha * temp2; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of DSYR2K */ + +} /* dsyr2k_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsyrk.cpp b/lib/linalg/dsyrk.cpp new file mode 100644 index 0000000000..28dc601121 --- /dev/null +++ b/lib/linalg/dsyrk.cpp @@ -0,0 +1,445 @@ +/* fortran/dsyrk.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DSYRK */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA,BETA */ +/* INTEGER K,LDA,LDC,N */ +/* CHARACTER TRANS,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),C(LDC,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYRK performs one of the symmetric rank k operations */ +/* > */ +/* > C := alpha*A*A**T + beta*C, */ +/* > */ +/* > or */ +/* > */ +/* > C := alpha*A**T*A + beta*C, */ +/* > */ +/* > where alpha and beta are scalars, C is an n by n symmetric matrix */ +/* > and A is an n by k matrix in the first case and a k by n matrix */ +/* > in the second case. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the array C is to be referenced as */ +/* > follows: */ +/* > */ +/* > UPLO = 'U' or 'u' Only the upper triangular part of C */ +/* > is to be referenced. */ +/* > */ +/* > UPLO = 'L' or 'l' Only the lower triangular part of C */ +/* > is to be referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' C := alpha*A*A**T + beta*C. */ +/* > */ +/* > TRANS = 'T' or 't' C := alpha*A**T*A + beta*C. */ +/* > */ +/* > TRANS = 'C' or 'c' C := alpha*A**T*A + beta*C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix C. N must be */ +/* > at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > On entry with TRANS = 'N' or 'n', K specifies the number */ +/* > of columns of the matrix A, and on entry with */ +/* > TRANS = 'T' or 't' or 'C' or 'c', K specifies the number */ +/* > of rows of the matrix A. K must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is */ +/* > k when TRANS = 'N' or 'n', and is n otherwise. */ +/* > Before entry with TRANS = 'N' or 'n', the leading n by k */ +/* > part of the array A must contain the matrix A, otherwise */ +/* > the leading k by n part of the array A must contain the */ +/* > matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* > then LDA must be at least max( 1, n ), otherwise LDA must */ +/* > be at least max( 1, k ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION. */ +/* > On entry, BETA specifies the scalar beta. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension ( LDC, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array C must contain the upper */ +/* > triangular part of the symmetric matrix and the strictly */ +/* > lower triangular part of C is not referenced. On exit, the */ +/* > upper triangular part of the array C is overwritten by the */ +/* > upper triangular part of the updated matrix. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array C must contain the lower */ +/* > triangular part of the symmetric matrix and the strictly */ +/* > upper triangular part of C is not referenced. On exit, the */ +/* > lower triangular part of the array C is overwritten by the */ +/* > lower triangular part of the updated matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > On entry, LDC specifies the first dimension of C as declared */ +/* > in the calling (sub) program. LDC must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k, + doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, + doublereal *c__, integer *ldc, ftnlen uplo_len, ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, l, info; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + + info = 0; + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"DSYRK ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form C := alpha*A*A**T + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A**T*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of DSYRK */ + +} /* dsyrk_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsytd2.cpp b/lib/linalg/dsytd2.cpp new file mode 100644 index 0000000000..4035fd0d58 --- /dev/null +++ b/lib/linalg/dsytd2.cpp @@ -0,0 +1,389 @@ +/* fortran/dsytd2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b8 = 0.; +static doublereal c_b14 = -1.; + +/* > \brief \b DSYTD2 reduces a symmetric matrix to real symmetric tridiagonal form by an orthogonal similarit +y transformation (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSYTD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal */ +/* > form T by an orthogonal similarity transformation: Q**T * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > symmetric matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the orthogonal */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the orthogonal matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n-1) . . . H(2) H(1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* > A(1:i-1,i+1), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(n-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( d e v2 v3 v4 ) ( d ) */ +/* > ( d e v3 v4 ) ( e d ) */ +/* > ( d e v4 ) ( v1 e d ) */ +/* > ( d e ) ( v1 v2 e d ) */ +/* > ( d ) ( v1 v2 v3 e d ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info, + ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, + integer *); + doublereal taui; + extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen); + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + logical upper; + extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, ftnlen), dlarfg_(integer *, doublereal *, + doublereal *, integer *, doublereal *), xerbla_(char *, integer * + , ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTD2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v**T */ +/* to annihilate A(1:i-1,i+1) */ + + dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 + + 1], &c__1, &taui); + e[i__] = a[i__ + (i__ + 1) * a_dim1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + a[i__ + (i__ + 1) * a_dim1] = 1.; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b8, &tau[1], &c__1, (ftnlen)1); + +/* Compute w := x - 1/2 * tau * (x**T * v) * v */ + + alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1) + * a_dim1 + 1], &c__1); + daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w**T - w * v**T */ + + dsyr2_(uplo, &i__, &c_b14, &a[(i__ + 1) * a_dim1 + 1], &c__1, + &tau[1], &c__1, &a[a_offset], lda, (ftnlen)1); + + a[i__ + (i__ + 1) * a_dim1] = e[i__]; + } + d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; + tau[i__] = taui; +/* L10: */ + } + d__[1] = a[a_dim1 + 1]; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v**T */ +/* to annihilate A(i+2:n,i) */ + + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ * + a_dim1], &c__1, &taui); + e[i__] = a[i__ + 1 + i__ * a_dim1]; + + if (taui != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + a[i__ + 1 + i__ * a_dim1] = 1.; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b8, &tau[ + i__], &c__1, (ftnlen)1); + +/* Compute w := x - 1/2 * tau * (x**T * v) * v */ + + i__2 = *n - i__; + alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ + + 1 + i__ * a_dim1], &c__1); + i__2 = *n - i__; + daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w**T - w * v**T */ + + i__2 = *n - i__; + dsyr2_(uplo, &i__2, &c_b14, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, (ftnlen)1); + + a[i__ + 1 + i__ * a_dim1] = e[i__]; + } + d__[i__] = a[i__ + i__ * a_dim1]; + tau[i__] = taui; +/* L20: */ + } + d__[*n] = a[*n + *n * a_dim1]; + } + + return 0; + +/* End of DSYTD2 */ + +} /* dsytd2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dsytrd.cpp b/lib/linalg/dsytrd.cpp new file mode 100644 index 0000000000..190dd8b1ac --- /dev/null +++ b/lib/linalg/dsytrd.cpp @@ -0,0 +1,454 @@ +/* fortran/dsytrd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static doublereal c_b22 = -1.; +static doublereal c_b23 = 1.; + +/* > \brief \b DSYTRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DSYTRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ), */ +/* $ WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DSYTRD reduces a real symmetric matrix A to real symmetric */ +/* > tridiagonal form T by an orthogonal similarity transformation: */ +/* > Q**T * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the symmetric matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the orthogonal */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the orthogonal matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is DOUBLE PRECISION array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1. */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleSYcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n-1) . . . H(2) H(1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* > A(1:i-1,i+1), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(n-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**T */ +/* > */ +/* > where tau is a real scalar, and v is a real vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( d e v2 v3 v4 ) ( d ) */ +/* > ( d e v3 v4 ) ( e d ) */ +/* > ( d e v4 ) ( v1 e d ) */ +/* > ( d e ) ( v1 v2 e d ) */ +/* > ( d ) ( v1 v2 v3 e d ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer * + lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal * + work, integer *lwork, integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), dsyr2k_(char *, char *, integer *, integer *, doublereal + *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, ftnlen, ftnlen), dlatrd_(char *, + integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, ftnlen), xerbla_(char *, + integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, (char *)"DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1] = (doublereal) lwkopt; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1] = 1.; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* Determine when to cross over from blocked to unblocked code */ +/* (last block is always handled by unblocked code). */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DSYTRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code by setting NX = N. */ + +/* Computing MAX */ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, (char *)"DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* Reduce the upper triangle of A. */ +/* Columns 1:kk are handled by the unblocked method. */ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = i__ + nb - 1; + dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork, (ftnlen)1); + +/* Update the unreduced submatrix A(1:i-1,1:i-1), using an */ +/* update of the form: A := A - V*W**T - W*V**T */ + + i__3 = i__ - 1; + dsyr2k_(uplo, (char *)"No transpose", &i__3, &nb, &c_b22, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda, ( + ftnlen)1, (ftnlen)12); + +/* Copy superdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j - 1 + j * a_dim1] = e[j - 1]; + d__[j] = a[j + j * a_dim1]; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo, + (ftnlen)1); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = *n - i__ + 1; + dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork, (ftnlen)1); + +/* Update the unreduced submatrix A(i+ib:n,i+ib:n), using */ +/* an update of the form: A := A - V*W**T - W*V**T */ + + i__3 = *n - i__ - nb + 1; + dsyr2k_(uplo, (char *)"No transpose", &i__3, &nb, &c_b22, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b23, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda, (ftnlen)1, (ftnlen) + 12); + +/* Copy subdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + a[j + 1 + j * a_dim1] = e[j]; + d__[j] = a[j + j * a_dim1]; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo, (ftnlen)1); + } + + work[1] = (doublereal) lwkopt; + return 0; + +/* End of DSYTRD */ + +} /* dsytrd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dtrmm.cpp b/lib/linalg/dtrmm.cpp new file mode 100644 index 0000000000..ffb6a1ad6b --- /dev/null +++ b/lib/linalg/dtrmm.cpp @@ -0,0 +1,527 @@ +/* fortran/dtrmm.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DTRMM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA */ +/* INTEGER LDA,LDB,M,N */ +/* CHARACTER DIAG,SIDE,TRANSA,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),B(LDB,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DTRMM performs one of the matrix-matrix operations */ +/* > */ +/* > B := alpha*op( A )*B, or B := alpha*B*op( A ), */ +/* > */ +/* > where alpha is a scalar, B is an m by n matrix, A is a unit, or */ +/* > non-unit, upper or lower triangular matrix and op( A ) is one of */ +/* > */ +/* > op( A ) = A or op( A ) = A**T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > On entry, SIDE specifies whether op( A ) multiplies B from */ +/* > the left or right as follows: */ +/* > */ +/* > SIDE = 'L' or 'l' B := alpha*op( A )*B. */ +/* > */ +/* > SIDE = 'R' or 'r' B := alpha*B*op( A ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the matrix A is an upper or */ +/* > lower triangular matrix as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' A is an upper triangular matrix. */ +/* > */ +/* > UPLO = 'L' or 'l' A is a lower triangular matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANSA */ +/* > \verbatim */ +/* > TRANSA is CHARACTER*1 */ +/* > On entry, TRANSA specifies the form of op( A ) to be used in */ +/* > the matrix multiplication as follows: */ +/* > */ +/* > TRANSA = 'N' or 'n' op( A ) = A. */ +/* > */ +/* > TRANSA = 'T' or 't' op( A ) = A**T. */ +/* > */ +/* > TRANSA = 'C' or 'c' op( A ) = A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > On entry, DIAG specifies whether or not A is unit triangular */ +/* > as follows: */ +/* > */ +/* > DIAG = 'U' or 'u' A is assumed to be unit triangular. */ +/* > */ +/* > DIAG = 'N' or 'n' A is not assumed to be unit */ +/* > triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of B. M must be at */ +/* > least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of B. N must be */ +/* > at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. When alpha is */ +/* > zero then A is not referenced and B need not be set before */ +/* > entry. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, k ), where k is m */ +/* > when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ +/* > Before entry with UPLO = 'U' or 'u', the leading k by k */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular matrix and the strictly lower triangular part of */ +/* > A is not referenced. */ +/* > Before entry with UPLO = 'L' or 'l', the leading k by k */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular matrix and the strictly upper triangular part of */ +/* > A is not referenced. */ +/* > Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* > A are not referenced either, but are assumed to be unity. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* > LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ +/* > then LDA must be at least max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension ( LDB, N ) */ +/* > Before entry, the leading m by n part of the array B must */ +/* > contain the matrix B, and on exit is overwritten by the */ +/* > transformed matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > On entry, LDB specifies the first dimension of B as declared */ +/* > in the calling (sub) program. LDB must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, + ftnlen transa_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, info; + doublereal temp; + logical lside; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical nounit; + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + + info = 0; + if (! lside && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (! lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 3; + } else if (! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_((char *)"DTRMM ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L30: */ + } + if (nounit) { + temp *= a[k + k * a_dim1]; + } + b[k + j * b_dim1] = temp; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + temp = *alpha * b[k + j * b_dim1]; + b[k + j * b_dim1] = temp; + if (nounit) { + b[k + j * b_dim1] *= a[k + k * a_dim1]; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * a[i__ + k * + a_dim1]; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A**T*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L90: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L100: */ + } +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = b[i__ + j * b_dim1]; + if (nounit) { + temp *= a[i__ + i__ * a_dim1]; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + temp += a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L120: */ + } + b[i__ + j * b_dim1] = *alpha * temp; +/* L130: */ + } +/* L140: */ + } + } + } + } else { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L150: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = *alpha; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L190: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + temp = *alpha * a[k + j * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L200: */ + } + } +/* L210: */ + } +/* L220: */ + } + } + } else { + +/* Form B := alpha*B*A**T. */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = *alpha * a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] += temp * b[i__ + k * + b_dim1]; +/* L270: */ + } + } +/* L280: */ + } + temp = *alpha; + if (nounit) { + temp *= a[k + k * a_dim1]; + } + if (temp != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L290: */ + } + } +/* L300: */ + } + } + } + } + + return 0; + +/* End of DTRMM */ + +} /* dtrmm_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dtrmv.cpp b/lib/linalg/dtrmv.cpp new file mode 100644 index 0000000000..77dbf2f66f --- /dev/null +++ b/lib/linalg/dtrmv.cpp @@ -0,0 +1,415 @@ +/* fortran/dtrmv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DTRMV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,LDA,N */ +/* CHARACTER DIAG,TRANS,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),X(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DTRMV performs one of the matrix-vector operations */ +/* > */ +/* > x := A*x, or x := A**T*x, */ +/* > */ +/* > where x is an n element vector and A is an n by n unit, or non-unit, */ +/* > upper or lower triangular matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the matrix is an upper or */ +/* > lower triangular matrix as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' A is an upper triangular matrix. */ +/* > */ +/* > UPLO = 'L' or 'l' A is a lower triangular matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' x := A*x. */ +/* > */ +/* > TRANS = 'T' or 't' x := A**T*x. */ +/* > */ +/* > TRANS = 'C' or 'c' x := A**T*x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > On entry, DIAG specifies whether or not A is unit */ +/* > triangular as follows: */ +/* > */ +/* > DIAG = 'U' or 'u' A is assumed to be unit triangular. */ +/* > */ +/* > DIAG = 'N' or 'n' A is not assumed to be unit */ +/* > triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular matrix and the strictly lower triangular part of */ +/* > A is not referenced. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular matrix and the strictly upper triangular part of */ +/* > A is not referenced. */ +/* > Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* > A are not referenced either, but are assumed to be unity. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element vector x. On exit, X is overwritten with the */ +/* > transformed vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > The vector and matrix arguments are not referenced when N = 0, or M = 0 */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx, ftnlen + uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical nounit; + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_((char *)"DTRMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form x := A*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j + j * a_dim1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + x[ix] += temp * a[i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j + j * a_dim1]; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A**T*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = jx; + if (nounit) { + temp *= a[j + j * a_dim1]; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + temp += a[i__ + j * a_dim1] * x[ix]; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTRMV */ + +} /* dtrmv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dtrsm.cpp b/lib/linalg/dtrsm.cpp new file mode 100644 index 0000000000..488da4ecdc --- /dev/null +++ b/lib/linalg/dtrsm.cpp @@ -0,0 +1,565 @@ +/* fortran/dtrsm.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DTRSM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA */ +/* INTEGER LDA,LDB,M,N */ +/* CHARACTER DIAG,SIDE,TRANSA,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),B(LDB,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DTRSM solves one of the matrix equations */ +/* > */ +/* > op( A )*X = alpha*B, or X*op( A ) = alpha*B, */ +/* > */ +/* > where alpha is a scalar, X and B are m by n matrices, A is a unit, or */ +/* > non-unit, upper or lower triangular matrix and op( A ) is one of */ +/* > */ +/* > op( A ) = A or op( A ) = A**T. */ +/* > */ +/* > The matrix X is overwritten on B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > On entry, SIDE specifies whether op( A ) appears on the left */ +/* > or right of X as follows: */ +/* > */ +/* > SIDE = 'L' or 'l' op( A )*X = alpha*B. */ +/* > */ +/* > SIDE = 'R' or 'r' X*op( A ) = alpha*B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the matrix A is an upper or */ +/* > lower triangular matrix as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' A is an upper triangular matrix. */ +/* > */ +/* > UPLO = 'L' or 'l' A is a lower triangular matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANSA */ +/* > \verbatim */ +/* > TRANSA is CHARACTER*1 */ +/* > On entry, TRANSA specifies the form of op( A ) to be used in */ +/* > the matrix multiplication as follows: */ +/* > */ +/* > TRANSA = 'N' or 'n' op( A ) = A. */ +/* > */ +/* > TRANSA = 'T' or 't' op( A ) = A**T. */ +/* > */ +/* > TRANSA = 'C' or 'c' op( A ) = A**T. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > On entry, DIAG specifies whether or not A is unit triangular */ +/* > as follows: */ +/* > */ +/* > DIAG = 'U' or 'u' A is assumed to be unit triangular. */ +/* > */ +/* > DIAG = 'N' or 'n' A is not assumed to be unit */ +/* > triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of B. M must be at */ +/* > least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of B. N must be */ +/* > at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. When alpha is */ +/* > zero then A is not referenced and B need not be set before */ +/* > entry. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, k ), */ +/* > where k is m when SIDE = 'L' or 'l' */ +/* > and k is n when SIDE = 'R' or 'r'. */ +/* > Before entry with UPLO = 'U' or 'u', the leading k by k */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular matrix and the strictly lower triangular part of */ +/* > A is not referenced. */ +/* > Before entry with UPLO = 'L' or 'l', the leading k by k */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular matrix and the strictly upper triangular part of */ +/* > A is not referenced. */ +/* > Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* > A are not referenced either, but are assumed to be unity. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* > LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ +/* > then LDA must be at least max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension ( LDB, N ) */ +/* > Before entry, the leading m by n part of the array B must */ +/* > contain the right-hand side matrix B, and on exit is */ +/* > overwritten by the solution matrix X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > On entry, LDB specifies the first dimension of B as declared */ +/* > in the calling (sub) program. LDB must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublereal *alpha, doublereal *a, integer * + lda, doublereal *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, + ftnlen transa_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j, k, info; + doublereal temp; + logical lside; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical nounit; + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + + info = 0; + if (! lside && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (! lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 3; + } else if (! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_((char *)"DTRSM ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A**T )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1. / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A**T ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1. / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of DTRSM */ + +} /* dtrsm_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dtrsv.cpp b/lib/linalg/dtrsv.cpp new file mode 100644 index 0000000000..58b85f9206 --- /dev/null +++ b/lib/linalg/dtrsv.cpp @@ -0,0 +1,411 @@ +/* fortran/dtrsv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DTRSV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,LDA,N */ +/* CHARACTER DIAG,TRANS,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A(LDA,*),X(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DTRSV solves one of the systems of equations */ +/* > */ +/* > A*x = b, or A**T*x = b, */ +/* > */ +/* > where b and x are n element vectors and A is an n by n unit, or */ +/* > non-unit, upper or lower triangular matrix. */ +/* > */ +/* > No test for singularity or near-singularity is included in this */ +/* > routine. Such tests must be performed before calling this routine. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the matrix is an upper or */ +/* > lower triangular matrix as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' A is an upper triangular matrix. */ +/* > */ +/* > UPLO = 'L' or 'l' A is a lower triangular matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the equations to be solved as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' A*x = b. */ +/* > */ +/* > TRANS = 'T' or 't' A**T*x = b. */ +/* > */ +/* > TRANS = 'C' or 'c' A**T*x = b. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > On entry, DIAG specifies whether or not A is unit */ +/* > triangular as follows: */ +/* > */ +/* > DIAG = 'U' or 'u' A is assumed to be unit triangular. */ +/* > */ +/* > DIAG = 'N' or 'n' A is not assumed to be unit */ +/* > triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension ( LDA, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular matrix and the strictly lower triangular part of */ +/* > A is not referenced. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular matrix and the strictly upper triangular part of */ +/* > A is not referenced. */ +/* > Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* > A are not referenced either, but are assumed to be unity. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is DOUBLE PRECISION array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element right-hand side vector b. On exit, X is overwritten */ +/* > with the solution vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > */ +/* > Level 2 Blas routine. */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup double_blas_level1 */ + +/* ===================================================================== */ +/* Subroutine */ int dtrsv_(char *uplo, char *trans, char *diag, integer *n, + doublereal *a, integer *lda, doublereal *x, integer *incx, ftnlen + uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical nounit; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_((char *)"DTRSV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form x := inv( A )*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + if (nounit) { + x[j] /= a[j + j * a_dim1]; + } + temp = x[j]; + for (i__ = j - 1; i__ >= 1; --i__) { + x[i__] -= temp * a[i__ + j * a_dim1]; +/* L10: */ + } + } +/* L20: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + if (nounit) { + x[jx] /= a[j + j * a_dim1]; + } + temp = x[jx]; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + x[ix] -= temp * a[i__ + j * a_dim1]; +/* L30: */ + } + } + jx -= *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + if (nounit) { + x[j] /= a[j + j * a_dim1]; + } + temp = x[j]; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + x[i__] -= temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + if (nounit) { + x[jx] /= a[j + j * a_dim1]; + } + temp = x[jx]; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + x[ix] -= temp * a[i__ + j * a_dim1]; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A**T )*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[j]; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp -= a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + if (nounit) { + temp /= a[j + j * a_dim1]; + } + x[j] = temp; +/* L100: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = x[jx]; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp -= a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + if (nounit) { + temp /= a[j + j * a_dim1]; + } + x[jx] = temp; + jx += *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + temp -= a[i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + if (nounit) { + temp /= a[j + j * a_dim1]; + } + x[j] = temp; +/* L140: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + temp -= a[i__ + j * a_dim1] * x[ix]; + ix -= *incx; +/* L150: */ + } + if (nounit) { + temp /= a[j + j * a_dim1]; + } + x[jx] = temp; + jx -= *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTRSV */ + +} /* dtrsv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dtrti2.cpp b/lib/linalg/dtrti2.cpp new file mode 100644 index 0000000000..f159e31b0d --- /dev/null +++ b/lib/linalg/dtrti2.cpp @@ -0,0 +1,258 @@ +/* fortran/dtrti2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b DTRTI2 computes the inverse of a triangular matrix (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DTRTI2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIAG, UPLO */ +/* INTEGER INFO, LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DTRTI2 computes the inverse of a real upper or lower triangular */ +/* > matrix. */ +/* > */ +/* > This is the Level 2 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the matrix A is upper or lower triangular. */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > Specifies whether or not the matrix A is unit triangular. */ +/* > = 'N': Non-unit triangular */ +/* > = 'U': Unit triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the triangular matrix A. If UPLO = 'U', the */ +/* > leading n by n upper triangular part of the array A contains */ +/* > the upper triangular matrix, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n by n lower triangular part of the array A contains */ +/* > the lower triangular matrix, and the strictly upper */ +/* > triangular part of A is not referenced. If DIAG = 'U', the */ +/* > diagonal elements of A are also not referenced and are */ +/* > assumed to be 1. */ +/* > */ +/* > On exit, the (triangular) inverse of the original matrix, in */ +/* > the same storage format. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -k, the k-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dtrti2_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info, ftnlen uplo_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + integer j; + doublereal ajj; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + logical nounit; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! nounit && ! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DTRTI2", &i__1, (ftnlen)6); + return 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + dtrmv_((char *)"Upper", (char *)"No transpose", diag, &i__2, &a[a_offset], lda, & + a[j * a_dim1 + 1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen) + 1); + i__2 = j - 1; + dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); +/* L10: */ + } + } else { + +/* Compute inverse of lower triangular matrix. */ + + for (j = *n; j >= 1; --j) { + if (nounit) { + a[j + j * a_dim1] = 1. / a[j + j * a_dim1]; + ajj = -a[j + j * a_dim1]; + } else { + ajj = -1.; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + dtrmv_((char *)"Lower", (char *)"No transpose", diag, &i__1, &a[j + 1 + (j + + 1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1, ( + ftnlen)5, (ftnlen)12, (ftnlen)1); + i__1 = *n - j; + dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + + return 0; + +/* End of DTRTI2 */ + +} /* dtrti2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dtrtri.cpp b/lib/linalg/dtrtri.cpp new file mode 100644 index 0000000000..faecb3dd69 --- /dev/null +++ b/lib/linalg/dtrtri.cpp @@ -0,0 +1,321 @@ +/* fortran/dtrtri.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static doublereal c_b18 = 1.; +static doublereal c_b22 = -1.; + +/* > \brief \b DTRTRI */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download DTRTRI + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIAG, UPLO */ +/* INTEGER INFO, LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DTRTRI computes the inverse of a real upper or lower triangular */ +/* > matrix A. */ +/* > */ +/* > This is the Level 3 BLAS version of the algorithm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': A is upper triangular; */ +/* > = 'L': A is lower triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > = 'N': A is non-unit triangular; */ +/* > = 'U': A is unit triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > On entry, the triangular matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of the array A contains */ +/* > the upper triangular matrix, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of the array A contains */ +/* > the lower triangular matrix, and the strictly upper */ +/* > triangular part of A is not referenced. If DIAG = 'U', the */ +/* > diagonal elements of A are also not referenced and are */ +/* > assumed to be 1. */ +/* > On exit, the (triangular) inverse of the original matrix, in */ +/* > the same storage format. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ +/* > matrix is singular and its inverse can not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup doubleOTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int dtrtri_(char *uplo, char *diag, integer *n, doublereal * + a, integer *lda, integer *info, ftnlen uplo_len, ftnlen diag_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer j, jb, nb, nn; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, + integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dtrsm_( + char *, char *, char *, char *, integer *, integer *, doublereal * + , doublereal *, integer *, doublereal *, integer *, ftnlen, + ftnlen, ftnlen, ftnlen); + logical upper; + extern /* Subroutine */ int dtrti2_(char *, char *, integer *, doublereal + *, integer *, integer *, ftnlen, ftnlen), xerbla_(char *, integer + *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + logical nounit; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! nounit && ! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DTRTRI", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } +/* L10: */ + } + *info = 0; + } + +/* Determine the block size for this environment. */ + +/* Writing concatenation */ + i__2[0] = 1, a__1[0] = uplo; + i__2[1] = 1, a__1[1] = diag; + s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)2); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code */ + + dtrti2_(uplo, diag, n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)1); + } else { + +/* Use blocked code */ + + if (upper) { + +/* Compute inverse of upper triangular matrix */ + + i__1 = *n; + i__3 = nb; + for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { +/* Computing MIN */ + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4,i__5); + +/* Compute rows 1:j-1 of current block column */ + + i__4 = j - 1; + dtrmm_((char *)"Left", (char *)"Upper", (char *)"No transpose", diag, &i__4, &jb, & + c_b18, &a[a_offset], lda, &a[j * a_dim1 + 1], lda, ( + ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__4 = j - 1; + dtrsm_((char *)"Right", (char *)"Upper", (char *)"No transpose", diag, &i__4, &jb, & + c_b22, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], + lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)1); + +/* Compute inverse of current diagonal block */ + + dtrti2_((char *)"Upper", diag, &jb, &a[j + j * a_dim1], lda, info, ( + ftnlen)5, (ftnlen)1); +/* L20: */ + } + } else { + +/* Compute inverse of lower triangular matrix */ + + nn = (*n - 1) / nb * nb + 1; + i__3 = -nb; + for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { +/* Computing MIN */ + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1,i__4); + if (j + jb <= *n) { + +/* Compute rows j+jb:n of current block column */ + + i__1 = *n - j - jb + 1; + dtrmm_((char *)"Left", (char *)"Lower", (char *)"No transpose", diag, &i__1, &jb, + &c_b18, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + + jb + j * a_dim1], lda, (ftnlen)4, (ftnlen)5, ( + ftnlen)12, (ftnlen)1); + i__1 = *n - j - jb + 1; + dtrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", diag, &i__1, &jb, + &c_b22, &a[j + j * a_dim1], lda, &a[j + jb + j * + a_dim1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, ( + ftnlen)1); + } + +/* Compute inverse of current diagonal block */ + + dtrti2_((char *)"Lower", diag, &jb, &a[j + j * a_dim1], lda, info, ( + ftnlen)5, (ftnlen)1); +/* L30: */ + } + } + } + + return 0; + +/* End of DTRTRI */ + +} /* dtrtri_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dznrm2.cpp b/lib/linalg/dznrm2.cpp new file mode 100644 index 0000000000..ec71aa201b --- /dev/null +++ b/lib/linalg/dznrm2.cpp @@ -0,0 +1,183 @@ +/* fortran/dznrm2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b DZNRM2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION DZNRM2(N,X,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 X(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > DZNRM2 returns the euclidean norm of a vector via the function */ +/* > name, so that */ +/* > */ +/* > DZNRM2 := sqrt( x**H*x ) */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (N) */ +/* > complex vector with N elements */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of X */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup double_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > -- This version written on 25-October-1982. */ +/* > Modified on 14-October-1993 to inline the call to ZLASSQ. */ +/* > Sven Hammarling, Nag Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal ret_val, d__1; + + /* Builtin functions */ + double d_imag(doublecomplex *), sqrt(doublereal); + + /* Local variables */ + integer ix; + doublereal ssq, temp, norm, scale; + + +/* -- Reference BLAS level1 routine (version 3.7.0) -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n < 1 || *incx < 1) { + norm = 0.; + } else { + scale = 0.; + ssq = 1.; +/* The following loop is equivalent to this call to the LAPACK */ +/* auxiliary routine: */ +/* CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) */ + + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + if (x[i__3].r != 0.) { + i__3 = ix; + temp = (d__1 = x[i__3].r, abs(d__1)); + if (scale < temp) { +/* Computing 2nd power */ + d__1 = scale / temp; + ssq = ssq * (d__1 * d__1) + 1.; + scale = temp; + } else { +/* Computing 2nd power */ + d__1 = temp / scale; + ssq += d__1 * d__1; + } + } + if (d_imag(&x[ix]) != 0.) { + temp = (d__1 = d_imag(&x[ix]), abs(d__1)); + if (scale < temp) { +/* Computing 2nd power */ + d__1 = scale / temp; + ssq = ssq * (d__1 * d__1) + 1.; + scale = temp; + } else { +/* Computing 2nd power */ + d__1 = temp / scale; + ssq += d__1 * d__1; + } + } +/* L10: */ + } + norm = scale * sqrt(ssq); + } + + ret_val = norm; + return ret_val; + +/* End of DZNRM2. */ + +} /* dznrm2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/dasum.f b/lib/linalg/fortran/dasum.f similarity index 100% rename from lib/linalg/dasum.f rename to lib/linalg/fortran/dasum.f diff --git a/lib/linalg/daxpy.f b/lib/linalg/fortran/daxpy.f similarity index 100% rename from lib/linalg/daxpy.f rename to lib/linalg/fortran/daxpy.f diff --git a/lib/linalg/dbdsqr.f b/lib/linalg/fortran/dbdsqr.f similarity index 100% rename from lib/linalg/dbdsqr.f rename to lib/linalg/fortran/dbdsqr.f diff --git a/lib/linalg/dcabs1.f b/lib/linalg/fortran/dcabs1.f similarity index 100% rename from lib/linalg/dcabs1.f rename to lib/linalg/fortran/dcabs1.f diff --git a/lib/linalg/dcopy.f b/lib/linalg/fortran/dcopy.f similarity index 100% rename from lib/linalg/dcopy.f rename to lib/linalg/fortran/dcopy.f diff --git a/lib/linalg/ddot.f b/lib/linalg/fortran/ddot.f similarity index 100% rename from lib/linalg/ddot.f rename to lib/linalg/fortran/ddot.f diff --git a/lib/linalg/dgebd2.f b/lib/linalg/fortran/dgebd2.f similarity index 100% rename from lib/linalg/dgebd2.f rename to lib/linalg/fortran/dgebd2.f diff --git a/lib/linalg/dgebrd.f b/lib/linalg/fortran/dgebrd.f similarity index 100% rename from lib/linalg/dgebrd.f rename to lib/linalg/fortran/dgebrd.f diff --git a/lib/linalg/dgecon.f b/lib/linalg/fortran/dgecon.f similarity index 100% rename from lib/linalg/dgecon.f rename to lib/linalg/fortran/dgecon.f diff --git a/lib/linalg/dgelq2.f b/lib/linalg/fortran/dgelq2.f similarity index 100% rename from lib/linalg/dgelq2.f rename to lib/linalg/fortran/dgelq2.f diff --git a/lib/linalg/dgelqf.f b/lib/linalg/fortran/dgelqf.f similarity index 100% rename from lib/linalg/dgelqf.f rename to lib/linalg/fortran/dgelqf.f diff --git a/lib/linalg/dgelsd.f b/lib/linalg/fortran/dgelsd.f similarity index 100% rename from lib/linalg/dgelsd.f rename to lib/linalg/fortran/dgelsd.f diff --git a/lib/linalg/dgelss.f b/lib/linalg/fortran/dgelss.f similarity index 100% rename from lib/linalg/dgelss.f rename to lib/linalg/fortran/dgelss.f diff --git a/lib/linalg/dgemm.f b/lib/linalg/fortran/dgemm.f similarity index 100% rename from lib/linalg/dgemm.f rename to lib/linalg/fortran/dgemm.f diff --git a/lib/linalg/dgemv.f b/lib/linalg/fortran/dgemv.f similarity index 100% rename from lib/linalg/dgemv.f rename to lib/linalg/fortran/dgemv.f diff --git a/lib/linalg/dgeqr2.f b/lib/linalg/fortran/dgeqr2.f similarity index 100% rename from lib/linalg/dgeqr2.f rename to lib/linalg/fortran/dgeqr2.f diff --git a/lib/linalg/dgeqrf.f b/lib/linalg/fortran/dgeqrf.f similarity index 100% rename from lib/linalg/dgeqrf.f rename to lib/linalg/fortran/dgeqrf.f diff --git a/lib/linalg/dger.f b/lib/linalg/fortran/dger.f similarity index 100% rename from lib/linalg/dger.f rename to lib/linalg/fortran/dger.f diff --git a/lib/linalg/dgesv.f b/lib/linalg/fortran/dgesv.f similarity index 100% rename from lib/linalg/dgesv.f rename to lib/linalg/fortran/dgesv.f diff --git a/lib/linalg/dgesvd.f b/lib/linalg/fortran/dgesvd.f similarity index 100% rename from lib/linalg/dgesvd.f rename to lib/linalg/fortran/dgesvd.f diff --git a/lib/linalg/dgetf2.f b/lib/linalg/fortran/dgetf2.f similarity index 100% rename from lib/linalg/dgetf2.f rename to lib/linalg/fortran/dgetf2.f diff --git a/lib/linalg/dgetrf.f b/lib/linalg/fortran/dgetrf.f similarity index 100% rename from lib/linalg/dgetrf.f rename to lib/linalg/fortran/dgetrf.f diff --git a/lib/linalg/dgetrf2.f b/lib/linalg/fortran/dgetrf2.f similarity index 100% rename from lib/linalg/dgetrf2.f rename to lib/linalg/fortran/dgetrf2.f diff --git a/lib/linalg/dgetri.f b/lib/linalg/fortran/dgetri.f similarity index 100% rename from lib/linalg/dgetri.f rename to lib/linalg/fortran/dgetri.f diff --git a/lib/linalg/dgetrs.f b/lib/linalg/fortran/dgetrs.f similarity index 100% rename from lib/linalg/dgetrs.f rename to lib/linalg/fortran/dgetrs.f diff --git a/lib/linalg/disnan.f b/lib/linalg/fortran/disnan.f similarity index 100% rename from lib/linalg/disnan.f rename to lib/linalg/fortran/disnan.f diff --git a/lib/linalg/dlabad.f b/lib/linalg/fortran/dlabad.f similarity index 100% rename from lib/linalg/dlabad.f rename to lib/linalg/fortran/dlabad.f diff --git a/lib/linalg/dlabrd.f b/lib/linalg/fortran/dlabrd.f similarity index 100% rename from lib/linalg/dlabrd.f rename to lib/linalg/fortran/dlabrd.f diff --git a/lib/linalg/dlacn2.f b/lib/linalg/fortran/dlacn2.f similarity index 100% rename from lib/linalg/dlacn2.f rename to lib/linalg/fortran/dlacn2.f diff --git a/lib/linalg/dlacpy.f b/lib/linalg/fortran/dlacpy.f similarity index 100% rename from lib/linalg/dlacpy.f rename to lib/linalg/fortran/dlacpy.f diff --git a/lib/linalg/dladiv.f b/lib/linalg/fortran/dladiv.f similarity index 100% rename from lib/linalg/dladiv.f rename to lib/linalg/fortran/dladiv.f diff --git a/lib/linalg/dlae2.f b/lib/linalg/fortran/dlae2.f similarity index 100% rename from lib/linalg/dlae2.f rename to lib/linalg/fortran/dlae2.f diff --git a/lib/linalg/dlaed0.f b/lib/linalg/fortran/dlaed0.f similarity index 100% rename from lib/linalg/dlaed0.f rename to lib/linalg/fortran/dlaed0.f diff --git a/lib/linalg/dlaed1.f b/lib/linalg/fortran/dlaed1.f similarity index 100% rename from lib/linalg/dlaed1.f rename to lib/linalg/fortran/dlaed1.f diff --git a/lib/linalg/dlaed2.f b/lib/linalg/fortran/dlaed2.f similarity index 100% rename from lib/linalg/dlaed2.f rename to lib/linalg/fortran/dlaed2.f diff --git a/lib/linalg/dlaed3.f b/lib/linalg/fortran/dlaed3.f similarity index 100% rename from lib/linalg/dlaed3.f rename to lib/linalg/fortran/dlaed3.f diff --git a/lib/linalg/dlaed4.f b/lib/linalg/fortran/dlaed4.f similarity index 100% rename from lib/linalg/dlaed4.f rename to lib/linalg/fortran/dlaed4.f diff --git a/lib/linalg/dlaed5.f b/lib/linalg/fortran/dlaed5.f similarity index 100% rename from lib/linalg/dlaed5.f rename to lib/linalg/fortran/dlaed5.f diff --git a/lib/linalg/dlaed6.f b/lib/linalg/fortran/dlaed6.f similarity index 100% rename from lib/linalg/dlaed6.f rename to lib/linalg/fortran/dlaed6.f diff --git a/lib/linalg/dlaed7.f b/lib/linalg/fortran/dlaed7.f similarity index 100% rename from lib/linalg/dlaed7.f rename to lib/linalg/fortran/dlaed7.f diff --git a/lib/linalg/dlaed8.f b/lib/linalg/fortran/dlaed8.f similarity index 100% rename from lib/linalg/dlaed8.f rename to lib/linalg/fortran/dlaed8.f diff --git a/lib/linalg/dlaed9.f b/lib/linalg/fortran/dlaed9.f similarity index 100% rename from lib/linalg/dlaed9.f rename to lib/linalg/fortran/dlaed9.f diff --git a/lib/linalg/dlaeda.f b/lib/linalg/fortran/dlaeda.f similarity index 100% rename from lib/linalg/dlaeda.f rename to lib/linalg/fortran/dlaeda.f diff --git a/lib/linalg/dlaev2.f b/lib/linalg/fortran/dlaev2.f similarity index 100% rename from lib/linalg/dlaev2.f rename to lib/linalg/fortran/dlaev2.f diff --git a/lib/linalg/dlaisnan.f b/lib/linalg/fortran/dlaisnan.f similarity index 100% rename from lib/linalg/dlaisnan.f rename to lib/linalg/fortran/dlaisnan.f diff --git a/lib/linalg/dlals0.f b/lib/linalg/fortran/dlals0.f similarity index 100% rename from lib/linalg/dlals0.f rename to lib/linalg/fortran/dlals0.f diff --git a/lib/linalg/dlalsa.f b/lib/linalg/fortran/dlalsa.f similarity index 100% rename from lib/linalg/dlalsa.f rename to lib/linalg/fortran/dlalsa.f diff --git a/lib/linalg/dlalsd.f b/lib/linalg/fortran/dlalsd.f similarity index 100% rename from lib/linalg/dlalsd.f rename to lib/linalg/fortran/dlalsd.f diff --git a/lib/linalg/dlamch.f b/lib/linalg/fortran/dlamch.f similarity index 100% rename from lib/linalg/dlamch.f rename to lib/linalg/fortran/dlamch.f diff --git a/lib/linalg/dlamrg.f b/lib/linalg/fortran/dlamrg.f similarity index 100% rename from lib/linalg/dlamrg.f rename to lib/linalg/fortran/dlamrg.f diff --git a/lib/linalg/dlange.f b/lib/linalg/fortran/dlange.f similarity index 100% rename from lib/linalg/dlange.f rename to lib/linalg/fortran/dlange.f diff --git a/lib/linalg/dlanst.f b/lib/linalg/fortran/dlanst.f similarity index 100% rename from lib/linalg/dlanst.f rename to lib/linalg/fortran/dlanst.f diff --git a/lib/linalg/dlansy.f b/lib/linalg/fortran/dlansy.f similarity index 100% rename from lib/linalg/dlansy.f rename to lib/linalg/fortran/dlansy.f diff --git a/lib/linalg/dlapy2.f b/lib/linalg/fortran/dlapy2.f similarity index 100% rename from lib/linalg/dlapy2.f rename to lib/linalg/fortran/dlapy2.f diff --git a/lib/linalg/dlapy3.f b/lib/linalg/fortran/dlapy3.f similarity index 100% rename from lib/linalg/dlapy3.f rename to lib/linalg/fortran/dlapy3.f diff --git a/lib/linalg/dlarf.f b/lib/linalg/fortran/dlarf.f similarity index 100% rename from lib/linalg/dlarf.f rename to lib/linalg/fortran/dlarf.f diff --git a/lib/linalg/dlarfb.f b/lib/linalg/fortran/dlarfb.f similarity index 100% rename from lib/linalg/dlarfb.f rename to lib/linalg/fortran/dlarfb.f diff --git a/lib/linalg/dlarfg.f b/lib/linalg/fortran/dlarfg.f similarity index 100% rename from lib/linalg/dlarfg.f rename to lib/linalg/fortran/dlarfg.f diff --git a/lib/linalg/dlarft.f b/lib/linalg/fortran/dlarft.f similarity index 100% rename from lib/linalg/dlarft.f rename to lib/linalg/fortran/dlarft.f diff --git a/lib/linalg/dlartg.f b/lib/linalg/fortran/dlartg.f similarity index 100% rename from lib/linalg/dlartg.f rename to lib/linalg/fortran/dlartg.f diff --git a/lib/linalg/dlas2.f b/lib/linalg/fortran/dlas2.f similarity index 100% rename from lib/linalg/dlas2.f rename to lib/linalg/fortran/dlas2.f diff --git a/lib/linalg/dlascl.f b/lib/linalg/fortran/dlascl.f similarity index 100% rename from lib/linalg/dlascl.f rename to lib/linalg/fortran/dlascl.f diff --git a/lib/linalg/dlasd4.f b/lib/linalg/fortran/dlasd4.f similarity index 100% rename from lib/linalg/dlasd4.f rename to lib/linalg/fortran/dlasd4.f diff --git a/lib/linalg/dlasd5.f b/lib/linalg/fortran/dlasd5.f similarity index 100% rename from lib/linalg/dlasd5.f rename to lib/linalg/fortran/dlasd5.f diff --git a/lib/linalg/dlasd6.f b/lib/linalg/fortran/dlasd6.f similarity index 100% rename from lib/linalg/dlasd6.f rename to lib/linalg/fortran/dlasd6.f diff --git a/lib/linalg/dlasd7.f b/lib/linalg/fortran/dlasd7.f similarity index 100% rename from lib/linalg/dlasd7.f rename to lib/linalg/fortran/dlasd7.f diff --git a/lib/linalg/dlasd8.f b/lib/linalg/fortran/dlasd8.f similarity index 100% rename from lib/linalg/dlasd8.f rename to lib/linalg/fortran/dlasd8.f diff --git a/lib/linalg/dlasda.f b/lib/linalg/fortran/dlasda.f similarity index 100% rename from lib/linalg/dlasda.f rename to lib/linalg/fortran/dlasda.f diff --git a/lib/linalg/dlasdq.f b/lib/linalg/fortran/dlasdq.f similarity index 100% rename from lib/linalg/dlasdq.f rename to lib/linalg/fortran/dlasdq.f diff --git a/lib/linalg/dlasdt.f b/lib/linalg/fortran/dlasdt.f similarity index 100% rename from lib/linalg/dlasdt.f rename to lib/linalg/fortran/dlasdt.f diff --git a/lib/linalg/dlaset.f b/lib/linalg/fortran/dlaset.f similarity index 100% rename from lib/linalg/dlaset.f rename to lib/linalg/fortran/dlaset.f diff --git a/lib/linalg/dlasq1.f b/lib/linalg/fortran/dlasq1.f similarity index 100% rename from lib/linalg/dlasq1.f rename to lib/linalg/fortran/dlasq1.f diff --git a/lib/linalg/dlasq2.f b/lib/linalg/fortran/dlasq2.f similarity index 100% rename from lib/linalg/dlasq2.f rename to lib/linalg/fortran/dlasq2.f diff --git a/lib/linalg/dlasq3.f b/lib/linalg/fortran/dlasq3.f similarity index 100% rename from lib/linalg/dlasq3.f rename to lib/linalg/fortran/dlasq3.f diff --git a/lib/linalg/dlasq4.f b/lib/linalg/fortran/dlasq4.f similarity index 100% rename from lib/linalg/dlasq4.f rename to lib/linalg/fortran/dlasq4.f diff --git a/lib/linalg/dlasq5.f b/lib/linalg/fortran/dlasq5.f similarity index 100% rename from lib/linalg/dlasq5.f rename to lib/linalg/fortran/dlasq5.f diff --git a/lib/linalg/dlasq6.f b/lib/linalg/fortran/dlasq6.f similarity index 100% rename from lib/linalg/dlasq6.f rename to lib/linalg/fortran/dlasq6.f diff --git a/lib/linalg/dlasr.f b/lib/linalg/fortran/dlasr.f similarity index 100% rename from lib/linalg/dlasr.f rename to lib/linalg/fortran/dlasr.f diff --git a/lib/linalg/dlasrt.f b/lib/linalg/fortran/dlasrt.f similarity index 100% rename from lib/linalg/dlasrt.f rename to lib/linalg/fortran/dlasrt.f diff --git a/lib/linalg/dlassq.f b/lib/linalg/fortran/dlassq.f similarity index 100% rename from lib/linalg/dlassq.f rename to lib/linalg/fortran/dlassq.f diff --git a/lib/linalg/dlasv2.f b/lib/linalg/fortran/dlasv2.f similarity index 100% rename from lib/linalg/dlasv2.f rename to lib/linalg/fortran/dlasv2.f diff --git a/lib/linalg/dlaswp.f b/lib/linalg/fortran/dlaswp.f similarity index 100% rename from lib/linalg/dlaswp.f rename to lib/linalg/fortran/dlaswp.f diff --git a/lib/linalg/dlatrd.f b/lib/linalg/fortran/dlatrd.f similarity index 100% rename from lib/linalg/dlatrd.f rename to lib/linalg/fortran/dlatrd.f diff --git a/lib/linalg/dlatrs.f b/lib/linalg/fortran/dlatrs.f similarity index 100% rename from lib/linalg/dlatrs.f rename to lib/linalg/fortran/dlatrs.f diff --git a/lib/linalg/dnrm2.f b/lib/linalg/fortran/dnrm2.f similarity index 100% rename from lib/linalg/dnrm2.f rename to lib/linalg/fortran/dnrm2.f diff --git a/lib/linalg/dorg2l.f b/lib/linalg/fortran/dorg2l.f similarity index 100% rename from lib/linalg/dorg2l.f rename to lib/linalg/fortran/dorg2l.f diff --git a/lib/linalg/dorg2r.f b/lib/linalg/fortran/dorg2r.f similarity index 100% rename from lib/linalg/dorg2r.f rename to lib/linalg/fortran/dorg2r.f diff --git a/lib/linalg/dorgbr.f b/lib/linalg/fortran/dorgbr.f similarity index 100% rename from lib/linalg/dorgbr.f rename to lib/linalg/fortran/dorgbr.f diff --git a/lib/linalg/dorgl2.f b/lib/linalg/fortran/dorgl2.f similarity index 100% rename from lib/linalg/dorgl2.f rename to lib/linalg/fortran/dorgl2.f diff --git a/lib/linalg/dorglq.f b/lib/linalg/fortran/dorglq.f similarity index 100% rename from lib/linalg/dorglq.f rename to lib/linalg/fortran/dorglq.f diff --git a/lib/linalg/dorgql.f b/lib/linalg/fortran/dorgql.f similarity index 100% rename from lib/linalg/dorgql.f rename to lib/linalg/fortran/dorgql.f diff --git a/lib/linalg/dorgqr.f b/lib/linalg/fortran/dorgqr.f similarity index 100% rename from lib/linalg/dorgqr.f rename to lib/linalg/fortran/dorgqr.f diff --git a/lib/linalg/dorgtr.f b/lib/linalg/fortran/dorgtr.f similarity index 100% rename from lib/linalg/dorgtr.f rename to lib/linalg/fortran/dorgtr.f diff --git a/lib/linalg/dorm2l.f b/lib/linalg/fortran/dorm2l.f similarity index 100% rename from lib/linalg/dorm2l.f rename to lib/linalg/fortran/dorm2l.f diff --git a/lib/linalg/dorm2r.f b/lib/linalg/fortran/dorm2r.f similarity index 100% rename from lib/linalg/dorm2r.f rename to lib/linalg/fortran/dorm2r.f diff --git a/lib/linalg/dormbr.f b/lib/linalg/fortran/dormbr.f similarity index 100% rename from lib/linalg/dormbr.f rename to lib/linalg/fortran/dormbr.f diff --git a/lib/linalg/dorml2.f b/lib/linalg/fortran/dorml2.f similarity index 100% rename from lib/linalg/dorml2.f rename to lib/linalg/fortran/dorml2.f diff --git a/lib/linalg/dormlq.f b/lib/linalg/fortran/dormlq.f similarity index 100% rename from lib/linalg/dormlq.f rename to lib/linalg/fortran/dormlq.f diff --git a/lib/linalg/dormql.f b/lib/linalg/fortran/dormql.f similarity index 100% rename from lib/linalg/dormql.f rename to lib/linalg/fortran/dormql.f diff --git a/lib/linalg/dormqr.f b/lib/linalg/fortran/dormqr.f similarity index 100% rename from lib/linalg/dormqr.f rename to lib/linalg/fortran/dormqr.f diff --git a/lib/linalg/dormtr.f b/lib/linalg/fortran/dormtr.f similarity index 100% rename from lib/linalg/dormtr.f rename to lib/linalg/fortran/dormtr.f diff --git a/lib/linalg/dposv.f b/lib/linalg/fortran/dposv.f similarity index 100% rename from lib/linalg/dposv.f rename to lib/linalg/fortran/dposv.f diff --git a/lib/linalg/dpotf2.f b/lib/linalg/fortran/dpotf2.f similarity index 100% rename from lib/linalg/dpotf2.f rename to lib/linalg/fortran/dpotf2.f diff --git a/lib/linalg/dpotrf.f b/lib/linalg/fortran/dpotrf.f similarity index 100% rename from lib/linalg/dpotrf.f rename to lib/linalg/fortran/dpotrf.f diff --git a/lib/linalg/dpotrf2.f b/lib/linalg/fortran/dpotrf2.f similarity index 100% rename from lib/linalg/dpotrf2.f rename to lib/linalg/fortran/dpotrf2.f diff --git a/lib/linalg/dpotrs.f b/lib/linalg/fortran/dpotrs.f similarity index 100% rename from lib/linalg/dpotrs.f rename to lib/linalg/fortran/dpotrs.f diff --git a/lib/linalg/drot.f b/lib/linalg/fortran/drot.f similarity index 100% rename from lib/linalg/drot.f rename to lib/linalg/fortran/drot.f diff --git a/lib/linalg/drscl.f b/lib/linalg/fortran/drscl.f similarity index 100% rename from lib/linalg/drscl.f rename to lib/linalg/fortran/drscl.f diff --git a/lib/linalg/dscal.f b/lib/linalg/fortran/dscal.f similarity index 100% rename from lib/linalg/dscal.f rename to lib/linalg/fortran/dscal.f diff --git a/lib/linalg/dstedc.f b/lib/linalg/fortran/dstedc.f similarity index 100% rename from lib/linalg/dstedc.f rename to lib/linalg/fortran/dstedc.f diff --git a/lib/linalg/dsteqr.f b/lib/linalg/fortran/dsteqr.f similarity index 100% rename from lib/linalg/dsteqr.f rename to lib/linalg/fortran/dsteqr.f diff --git a/lib/linalg/dsterf.f b/lib/linalg/fortran/dsterf.f similarity index 100% rename from lib/linalg/dsterf.f rename to lib/linalg/fortran/dsterf.f diff --git a/lib/linalg/dswap.f b/lib/linalg/fortran/dswap.f similarity index 100% rename from lib/linalg/dswap.f rename to lib/linalg/fortran/dswap.f diff --git a/lib/linalg/dsyev.f b/lib/linalg/fortran/dsyev.f similarity index 100% rename from lib/linalg/dsyev.f rename to lib/linalg/fortran/dsyev.f diff --git a/lib/linalg/dsyevd.f b/lib/linalg/fortran/dsyevd.f similarity index 100% rename from lib/linalg/dsyevd.f rename to lib/linalg/fortran/dsyevd.f diff --git a/lib/linalg/dsygs2.f b/lib/linalg/fortran/dsygs2.f similarity index 100% rename from lib/linalg/dsygs2.f rename to lib/linalg/fortran/dsygs2.f diff --git a/lib/linalg/dsygst.f b/lib/linalg/fortran/dsygst.f similarity index 100% rename from lib/linalg/dsygst.f rename to lib/linalg/fortran/dsygst.f diff --git a/lib/linalg/dsygv.f b/lib/linalg/fortran/dsygv.f similarity index 100% rename from lib/linalg/dsygv.f rename to lib/linalg/fortran/dsygv.f diff --git a/lib/linalg/dsygvd.f b/lib/linalg/fortran/dsygvd.f similarity index 100% rename from lib/linalg/dsygvd.f rename to lib/linalg/fortran/dsygvd.f diff --git a/lib/linalg/dsymm.f b/lib/linalg/fortran/dsymm.f similarity index 100% rename from lib/linalg/dsymm.f rename to lib/linalg/fortran/dsymm.f diff --git a/lib/linalg/dsymv.f b/lib/linalg/fortran/dsymv.f similarity index 100% rename from lib/linalg/dsymv.f rename to lib/linalg/fortran/dsymv.f diff --git a/lib/linalg/dsyr2.f b/lib/linalg/fortran/dsyr2.f similarity index 100% rename from lib/linalg/dsyr2.f rename to lib/linalg/fortran/dsyr2.f diff --git a/lib/linalg/dsyr2k.f b/lib/linalg/fortran/dsyr2k.f similarity index 100% rename from lib/linalg/dsyr2k.f rename to lib/linalg/fortran/dsyr2k.f diff --git a/lib/linalg/dsyrk.f b/lib/linalg/fortran/dsyrk.f similarity index 100% rename from lib/linalg/dsyrk.f rename to lib/linalg/fortran/dsyrk.f diff --git a/lib/linalg/dsytd2.f b/lib/linalg/fortran/dsytd2.f similarity index 100% rename from lib/linalg/dsytd2.f rename to lib/linalg/fortran/dsytd2.f diff --git a/lib/linalg/dsytrd.f b/lib/linalg/fortran/dsytrd.f similarity index 100% rename from lib/linalg/dsytrd.f rename to lib/linalg/fortran/dsytrd.f diff --git a/lib/linalg/dtrmm.f b/lib/linalg/fortran/dtrmm.f similarity index 100% rename from lib/linalg/dtrmm.f rename to lib/linalg/fortran/dtrmm.f diff --git a/lib/linalg/dtrmv.f b/lib/linalg/fortran/dtrmv.f similarity index 100% rename from lib/linalg/dtrmv.f rename to lib/linalg/fortran/dtrmv.f diff --git a/lib/linalg/dtrsm.f b/lib/linalg/fortran/dtrsm.f similarity index 100% rename from lib/linalg/dtrsm.f rename to lib/linalg/fortran/dtrsm.f diff --git a/lib/linalg/dtrsv.f b/lib/linalg/fortran/dtrsv.f similarity index 100% rename from lib/linalg/dtrsv.f rename to lib/linalg/fortran/dtrsv.f diff --git a/lib/linalg/dtrti2.f b/lib/linalg/fortran/dtrti2.f similarity index 100% rename from lib/linalg/dtrti2.f rename to lib/linalg/fortran/dtrti2.f diff --git a/lib/linalg/dtrtri.f b/lib/linalg/fortran/dtrtri.f similarity index 100% rename from lib/linalg/dtrtri.f rename to lib/linalg/fortran/dtrtri.f diff --git a/lib/linalg/dznrm2.f b/lib/linalg/fortran/dznrm2.f similarity index 100% rename from lib/linalg/dznrm2.f rename to lib/linalg/fortran/dznrm2.f diff --git a/lib/linalg/idamax.f b/lib/linalg/fortran/idamax.f similarity index 100% rename from lib/linalg/idamax.f rename to lib/linalg/fortran/idamax.f diff --git a/lib/linalg/ieeeck.f b/lib/linalg/fortran/ieeeck.f similarity index 100% rename from lib/linalg/ieeeck.f rename to lib/linalg/fortran/ieeeck.f diff --git a/lib/linalg/iladlc.f b/lib/linalg/fortran/iladlc.f similarity index 100% rename from lib/linalg/iladlc.f rename to lib/linalg/fortran/iladlc.f diff --git a/lib/linalg/iladlr.f b/lib/linalg/fortran/iladlr.f similarity index 100% rename from lib/linalg/iladlr.f rename to lib/linalg/fortran/iladlr.f diff --git a/lib/linalg/ilaenv.f b/lib/linalg/fortran/ilaenv.f similarity index 100% rename from lib/linalg/ilaenv.f rename to lib/linalg/fortran/ilaenv.f diff --git a/lib/linalg/ilazlc.f b/lib/linalg/fortran/ilazlc.f similarity index 100% rename from lib/linalg/ilazlc.f rename to lib/linalg/fortran/ilazlc.f diff --git a/lib/linalg/ilazlr.f b/lib/linalg/fortran/ilazlr.f similarity index 100% rename from lib/linalg/ilazlr.f rename to lib/linalg/fortran/ilazlr.f diff --git a/lib/linalg/iparmq.f b/lib/linalg/fortran/iparmq.f similarity index 100% rename from lib/linalg/iparmq.f rename to lib/linalg/fortran/iparmq.f diff --git a/lib/linalg/lsame.f b/lib/linalg/fortran/lsame.f similarity index 100% rename from lib/linalg/lsame.f rename to lib/linalg/fortran/lsame.f diff --git a/lib/linalg/xerbla.f b/lib/linalg/fortran/xerbla.f similarity index 100% rename from lib/linalg/xerbla.f rename to lib/linalg/fortran/xerbla.f diff --git a/lib/linalg/zaxpy.f b/lib/linalg/fortran/zaxpy.f similarity index 100% rename from lib/linalg/zaxpy.f rename to lib/linalg/fortran/zaxpy.f diff --git a/lib/linalg/zcopy.f b/lib/linalg/fortran/zcopy.f similarity index 100% rename from lib/linalg/zcopy.f rename to lib/linalg/fortran/zcopy.f diff --git a/lib/linalg/zdotc.f b/lib/linalg/fortran/zdotc.f similarity index 100% rename from lib/linalg/zdotc.f rename to lib/linalg/fortran/zdotc.f diff --git a/lib/linalg/zdrot.f b/lib/linalg/fortran/zdrot.f similarity index 100% rename from lib/linalg/zdrot.f rename to lib/linalg/fortran/zdrot.f diff --git a/lib/linalg/zdscal.f b/lib/linalg/fortran/zdscal.f similarity index 100% rename from lib/linalg/zdscal.f rename to lib/linalg/fortran/zdscal.f diff --git a/lib/linalg/zgemm.f b/lib/linalg/fortran/zgemm.f similarity index 100% rename from lib/linalg/zgemm.f rename to lib/linalg/fortran/zgemm.f diff --git a/lib/linalg/zgemv.f b/lib/linalg/fortran/zgemv.f similarity index 100% rename from lib/linalg/zgemv.f rename to lib/linalg/fortran/zgemv.f diff --git a/lib/linalg/zgerc.f b/lib/linalg/fortran/zgerc.f similarity index 100% rename from lib/linalg/zgerc.f rename to lib/linalg/fortran/zgerc.f diff --git a/lib/linalg/zheev.f b/lib/linalg/fortran/zheev.f similarity index 100% rename from lib/linalg/zheev.f rename to lib/linalg/fortran/zheev.f diff --git a/lib/linalg/zheevd.f b/lib/linalg/fortran/zheevd.f similarity index 100% rename from lib/linalg/zheevd.f rename to lib/linalg/fortran/zheevd.f diff --git a/lib/linalg/zhemv.f b/lib/linalg/fortran/zhemv.f similarity index 100% rename from lib/linalg/zhemv.f rename to lib/linalg/fortran/zhemv.f diff --git a/lib/linalg/zher2.f b/lib/linalg/fortran/zher2.f similarity index 100% rename from lib/linalg/zher2.f rename to lib/linalg/fortran/zher2.f diff --git a/lib/linalg/zher2k.f b/lib/linalg/fortran/zher2k.f similarity index 100% rename from lib/linalg/zher2k.f rename to lib/linalg/fortran/zher2k.f diff --git a/lib/linalg/zhetd2.f b/lib/linalg/fortran/zhetd2.f similarity index 100% rename from lib/linalg/zhetd2.f rename to lib/linalg/fortran/zhetd2.f diff --git a/lib/linalg/zhetrd.f b/lib/linalg/fortran/zhetrd.f similarity index 100% rename from lib/linalg/zhetrd.f rename to lib/linalg/fortran/zhetrd.f diff --git a/lib/linalg/zhpr.f b/lib/linalg/fortran/zhpr.f similarity index 100% rename from lib/linalg/zhpr.f rename to lib/linalg/fortran/zhpr.f diff --git a/lib/linalg/zlacgv.f b/lib/linalg/fortran/zlacgv.f similarity index 100% rename from lib/linalg/zlacgv.f rename to lib/linalg/fortran/zlacgv.f diff --git a/lib/linalg/zlacpy.f b/lib/linalg/fortran/zlacpy.f similarity index 100% rename from lib/linalg/zlacpy.f rename to lib/linalg/fortran/zlacpy.f diff --git a/lib/linalg/zlacrm.f b/lib/linalg/fortran/zlacrm.f similarity index 100% rename from lib/linalg/zlacrm.f rename to lib/linalg/fortran/zlacrm.f diff --git a/lib/linalg/zladiv.f b/lib/linalg/fortran/zladiv.f similarity index 100% rename from lib/linalg/zladiv.f rename to lib/linalg/fortran/zladiv.f diff --git a/lib/linalg/zlaed0.f b/lib/linalg/fortran/zlaed0.f similarity index 100% rename from lib/linalg/zlaed0.f rename to lib/linalg/fortran/zlaed0.f diff --git a/lib/linalg/zlaed7.f b/lib/linalg/fortran/zlaed7.f similarity index 100% rename from lib/linalg/zlaed7.f rename to lib/linalg/fortran/zlaed7.f diff --git a/lib/linalg/zlaed8.f b/lib/linalg/fortran/zlaed8.f similarity index 100% rename from lib/linalg/zlaed8.f rename to lib/linalg/fortran/zlaed8.f diff --git a/lib/linalg/zlanhe.f b/lib/linalg/fortran/zlanhe.f similarity index 100% rename from lib/linalg/zlanhe.f rename to lib/linalg/fortran/zlanhe.f diff --git a/lib/linalg/zlarf.f b/lib/linalg/fortran/zlarf.f similarity index 100% rename from lib/linalg/zlarf.f rename to lib/linalg/fortran/zlarf.f diff --git a/lib/linalg/zlarfb.f b/lib/linalg/fortran/zlarfb.f similarity index 100% rename from lib/linalg/zlarfb.f rename to lib/linalg/fortran/zlarfb.f diff --git a/lib/linalg/zlarfg.f b/lib/linalg/fortran/zlarfg.f similarity index 100% rename from lib/linalg/zlarfg.f rename to lib/linalg/fortran/zlarfg.f diff --git a/lib/linalg/zlarft.f b/lib/linalg/fortran/zlarft.f similarity index 100% rename from lib/linalg/zlarft.f rename to lib/linalg/fortran/zlarft.f diff --git a/lib/linalg/zlascl.f b/lib/linalg/fortran/zlascl.f similarity index 100% rename from lib/linalg/zlascl.f rename to lib/linalg/fortran/zlascl.f diff --git a/lib/linalg/zlaset.f b/lib/linalg/fortran/zlaset.f similarity index 100% rename from lib/linalg/zlaset.f rename to lib/linalg/fortran/zlaset.f diff --git a/lib/linalg/zlasr.f b/lib/linalg/fortran/zlasr.f similarity index 100% rename from lib/linalg/zlasr.f rename to lib/linalg/fortran/zlasr.f diff --git a/lib/linalg/zlassq.f b/lib/linalg/fortran/zlassq.f similarity index 100% rename from lib/linalg/zlassq.f rename to lib/linalg/fortran/zlassq.f diff --git a/lib/linalg/zlatrd.f b/lib/linalg/fortran/zlatrd.f similarity index 100% rename from lib/linalg/zlatrd.f rename to lib/linalg/fortran/zlatrd.f diff --git a/lib/linalg/zpptrf.f b/lib/linalg/fortran/zpptrf.f similarity index 100% rename from lib/linalg/zpptrf.f rename to lib/linalg/fortran/zpptrf.f diff --git a/lib/linalg/zpptri.f b/lib/linalg/fortran/zpptri.f similarity index 100% rename from lib/linalg/zpptri.f rename to lib/linalg/fortran/zpptri.f diff --git a/lib/linalg/zscal.f b/lib/linalg/fortran/zscal.f similarity index 100% rename from lib/linalg/zscal.f rename to lib/linalg/fortran/zscal.f diff --git a/lib/linalg/zstedc.f b/lib/linalg/fortran/zstedc.f similarity index 100% rename from lib/linalg/zstedc.f rename to lib/linalg/fortran/zstedc.f diff --git a/lib/linalg/zsteqr.f b/lib/linalg/fortran/zsteqr.f similarity index 100% rename from lib/linalg/zsteqr.f rename to lib/linalg/fortran/zsteqr.f diff --git a/lib/linalg/zswap.f b/lib/linalg/fortran/zswap.f similarity index 100% rename from lib/linalg/zswap.f rename to lib/linalg/fortran/zswap.f diff --git a/lib/linalg/ztpmv.f b/lib/linalg/fortran/ztpmv.f similarity index 100% rename from lib/linalg/ztpmv.f rename to lib/linalg/fortran/ztpmv.f diff --git a/lib/linalg/ztpsv.f b/lib/linalg/fortran/ztpsv.f similarity index 100% rename from lib/linalg/ztpsv.f rename to lib/linalg/fortran/ztpsv.f diff --git a/lib/linalg/ztptri.f b/lib/linalg/fortran/ztptri.f similarity index 100% rename from lib/linalg/ztptri.f rename to lib/linalg/fortran/ztptri.f diff --git a/lib/linalg/ztrmm.f b/lib/linalg/fortran/ztrmm.f similarity index 100% rename from lib/linalg/ztrmm.f rename to lib/linalg/fortran/ztrmm.f diff --git a/lib/linalg/ztrmv.f b/lib/linalg/fortran/ztrmv.f similarity index 100% rename from lib/linalg/ztrmv.f rename to lib/linalg/fortran/ztrmv.f diff --git a/lib/linalg/zung2l.f b/lib/linalg/fortran/zung2l.f similarity index 100% rename from lib/linalg/zung2l.f rename to lib/linalg/fortran/zung2l.f diff --git a/lib/linalg/zung2r.f b/lib/linalg/fortran/zung2r.f similarity index 100% rename from lib/linalg/zung2r.f rename to lib/linalg/fortran/zung2r.f diff --git a/lib/linalg/zungl2.f b/lib/linalg/fortran/zungl2.f similarity index 100% rename from lib/linalg/zungl2.f rename to lib/linalg/fortran/zungl2.f diff --git a/lib/linalg/zungql.f b/lib/linalg/fortran/zungql.f similarity index 100% rename from lib/linalg/zungql.f rename to lib/linalg/fortran/zungql.f diff --git a/lib/linalg/zungqr.f b/lib/linalg/fortran/zungqr.f similarity index 100% rename from lib/linalg/zungqr.f rename to lib/linalg/fortran/zungqr.f diff --git a/lib/linalg/zungtr.f b/lib/linalg/fortran/zungtr.f similarity index 100% rename from lib/linalg/zungtr.f rename to lib/linalg/fortran/zungtr.f diff --git a/lib/linalg/zunm2l.f b/lib/linalg/fortran/zunm2l.f similarity index 100% rename from lib/linalg/zunm2l.f rename to lib/linalg/fortran/zunm2l.f diff --git a/lib/linalg/zunm2r.f b/lib/linalg/fortran/zunm2r.f similarity index 100% rename from lib/linalg/zunm2r.f rename to lib/linalg/fortran/zunm2r.f diff --git a/lib/linalg/zunmql.f b/lib/linalg/fortran/zunmql.f similarity index 100% rename from lib/linalg/zunmql.f rename to lib/linalg/fortran/zunmql.f diff --git a/lib/linalg/zunmqr.f b/lib/linalg/fortran/zunmqr.f similarity index 100% rename from lib/linalg/zunmqr.f rename to lib/linalg/fortran/zunmqr.f diff --git a/lib/linalg/zunmtr.f b/lib/linalg/fortran/zunmtr.f similarity index 100% rename from lib/linalg/zunmtr.f rename to lib/linalg/fortran/zunmtr.f diff --git a/lib/linalg/idamax.cpp b/lib/linalg/idamax.cpp new file mode 100644 index 0000000000..edcf861435 --- /dev/null +++ b/lib/linalg/idamax.cpp @@ -0,0 +1,161 @@ +/* fortran/idamax.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b IDAMAX */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* INTEGER FUNCTION IDAMAX(N,DX,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION DX(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > IDAMAX finds the index of the first element having maximum absolute value. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DX */ +/* > \verbatim */ +/* > DX is DOUBLE PRECISION array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of DX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup aux_blas */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > modified 3/93 to return if incx .le. 0. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +integer idamax_(integer *n, doublereal *dx, integer *incx) +{ + /* System generated locals */ + integer ret_val, i__1; + doublereal d__1; + + /* Local variables */ + integer i__, ix; + doublereal dmax__; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --dx; + + /* Function Body */ + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + +/* code for increment equal to 1 */ + + dmax__ = abs(dx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((d__1 = dx[i__], abs(d__1)) > dmax__) { + ret_val = i__; + dmax__ = (d__1 = dx[i__], abs(d__1)); + } + } + } else { + +/* code for increment not equal to 1 */ + + ix = 1; + dmax__ = abs(dx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if ((d__1 = dx[ix], abs(d__1)) > dmax__) { + ret_val = i__; + dmax__ = (d__1 = dx[ix], abs(d__1)); + } + ix += *incx; + } + } + return ret_val; + +/* End of IDAMAX */ + +} /* idamax_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ieeeck.cpp b/lib/linalg/ieeeck.cpp new file mode 100644 index 0000000000..16626acf9d --- /dev/null +++ b/lib/linalg/ieeeck.cpp @@ -0,0 +1,228 @@ +/* fortran/ieeeck.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b IEEECK */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download IEEECK + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* INTEGER FUNCTION IEEECK( ISPEC, ZERO, ONE ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER ISPEC */ +/* REAL ONE, ZERO */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > IEEECK is called from the ILAENV to verify that Infinity and */ +/* > possibly NaN arithmetic is safe (i.e. will not trap). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ISPEC */ +/* > \verbatim */ +/* > ISPEC is INTEGER */ +/* > Specifies whether to test just for infinity arithmetic */ +/* > or whether to test for infinity and NaN arithmetic. */ +/* > = 0: Verify infinity arithmetic only. */ +/* > = 1: Verify infinity and NaN arithmetic. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ZERO */ +/* > \verbatim */ +/* > ZERO is REAL */ +/* > Must contain the value 0.0 */ +/* > This is passed to prevent the compiler from optimizing */ +/* > away this code. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ONE */ +/* > \verbatim */ +/* > ONE is REAL */ +/* > Must contain the value 1.0 */ +/* > This is passed to prevent the compiler from optimizing */ +/* > away this code. */ +/* > */ +/* > RETURN VALUE: INTEGER */ +/* > = 0: Arithmetic failed to produce the correct answers */ +/* > = 1: Arithmetic produced the correct answers */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +integer ieeeck_(integer *ispec, real *zero, real *one) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, newzro; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + ret_val = 1; + + posinf = *one / *zero; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf = -(*one) / *zero; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + negzro = *one / (neginf + *one); + if (negzro != *zero) { + ret_val = 0; + return ret_val; + } + + neginf = *one / negzro; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + newzro = negzro + *zero; + if (newzro != *zero) { + ret_val = 0; + return ret_val; + } + + posinf = *one / newzro; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf *= posinf; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + posinf *= posinf; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + + + +/* Return if we were only asked to check infinity arithmetic */ + + if (*ispec == 0) { + return ret_val; + } + + nan1 = posinf + neginf; + + nan2 = posinf / neginf; + + nan3 = posinf / posinf; + + nan4 = posinf * *zero; + + nan5 = neginf * negzro; + + nan6 = nan5 * *zero; + + if (nan1 == nan1) { + ret_val = 0; + return ret_val; + } + + if (nan2 == nan2) { + ret_val = 0; + return ret_val; + } + + if (nan3 == nan3) { + ret_val = 0; + return ret_val; + } + + if (nan4 == nan4) { + ret_val = 0; + return ret_val; + } + + if (nan5 == nan5) { + ret_val = 0; + return ret_val; + } + + if (nan6 == nan6) { + ret_val = 0; + return ret_val; + } + + return ret_val; +} /* ieeeck_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/iladlc.cpp b/lib/linalg/iladlc.cpp new file mode 100644 index 0000000000..21b05e970a --- /dev/null +++ b/lib/linalg/iladlc.cpp @@ -0,0 +1,150 @@ +/* fortran/iladlc.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ILADLC scans a matrix for its last non-zero column. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ILADLC + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* INTEGER FUNCTION ILADLC( M, N, A, LDA ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER M, N, LDA */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ILADLC scans A for its last non-zero column. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The m by n matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +integer iladlc_(integer *m, integer *n, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + integer i__; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else if (a[*n * a_dim1 + 1] != 0. || a[*m + *n * a_dim1] != 0.) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (a[i__ + ret_val * a_dim1] != 0.) { + return ret_val; + } + } + } + } + return ret_val; +} /* iladlc_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/iladlr.cpp b/lib/linalg/iladlr.cpp new file mode 100644 index 0000000000..a301b66170 --- /dev/null +++ b/lib/linalg/iladlr.cpp @@ -0,0 +1,151 @@ +/* fortran/iladlr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ILADLR scans a matrix for its last non-zero row. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ILADLR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* INTEGER FUNCTION ILADLR( M, N, A, LDA ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER M, N, LDA */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ILADLR scans A for its last non-zero row. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is DOUBLE PRECISION array, dimension (LDA,N) */ +/* > The m by n matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* ===================================================================== */ +integer iladlr_(integer *m, integer *n, doublereal *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else if (a[*m + a_dim1] != 0. || a[*m + *n * a_dim1] != 0.) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__ = *m; + while(a[max(i__,1) + j * a_dim1] == 0. && i__ >= 1) { + --i__; + } + ret_val = max(ret_val,i__); + } + } + return ret_val; +} /* iladlr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ilaenv.cpp b/lib/linalg/ilaenv.cpp new file mode 100644 index 0000000000..0e9a051222 --- /dev/null +++ b/lib/linalg/ilaenv.cpp @@ -0,0 +1,845 @@ +/* fortran/ilaenv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static real c_b176 = (float)0.; +static real c_b177 = (float)1.; +static integer c__0 = 0; + +/* > \brief \b ILAENV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ILAENV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER*( * ) NAME, OPTS */ +/* INTEGER ISPEC, N1, N2, N3, N4 */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ILAENV is called from the LAPACK routines to choose problem-dependent */ +/* > parameters for the local environment. See ISPEC for a description of */ +/* > the parameters. */ +/* > */ +/* > ILAENV returns an INTEGER */ +/* > if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */ +/* > if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. */ +/* > */ +/* > This version provides a set of parameters which should give good, */ +/* > but not optimal, performance on many of the currently available */ +/* > computers. Users are encouraged to modify this subroutine to set */ +/* > the tuning parameters for their particular machine using the option */ +/* > and problem size information in the arguments. */ +/* > */ +/* > This routine will not function correctly if it is converted to all */ +/* > lower case. Converting it to all upper case is allowed. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ISPEC */ +/* > \verbatim */ +/* > ISPEC is INTEGER */ +/* > Specifies the parameter to be returned as the value of */ +/* > ILAENV. */ +/* > = 1: the optimal blocksize; if this value is 1, an unblocked */ +/* > algorithm will give the best performance. */ +/* > = 2: the minimum block size for which the block routine */ +/* > should be used; if the usable block size is less than */ +/* > this value, an unblocked routine should be used. */ +/* > = 3: the crossover point (in a block routine, for N less */ +/* > than this value, an unblocked routine should be used) */ +/* > = 4: the number of shifts, used in the nonsymmetric */ +/* > eigenvalue routines (DEPRECATED) */ +/* > = 5: the minimum column dimension for blocking to be used; */ +/* > rectangular blocks must have dimension at least k by m, */ +/* > where k is given by ILAENV(2,...) and m by ILAENV(5,...) */ +/* > = 6: the crossover point for the SVD (when reducing an m by n */ +/* > matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */ +/* > this value, a QR factorization is used first to reduce */ +/* > the matrix to a triangular form.) */ +/* > = 7: the number of processors */ +/* > = 8: the crossover point for the multishift QR method */ +/* > for nonsymmetric eigenvalue problems (DEPRECATED) */ +/* > = 9: maximum size of the subproblems at the bottom of the */ +/* > computation tree in the divide-and-conquer algorithm */ +/* > (used by xGELSD and xGESDD) */ +/* > =10: ieee infinity and NaN arithmetic can be trusted not to trap */ +/* > =11: infinity arithmetic can be trusted not to trap */ +/* > 12 <= ISPEC <= 17: */ +/* > xHSEQR or related subroutines, */ +/* > see IPARMQ for detailed explanation */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NAME */ +/* > \verbatim */ +/* > NAME is CHARACTER*(*) */ +/* > The name of the calling subroutine, in either upper case or */ +/* > lower case. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] OPTS */ +/* > \verbatim */ +/* > OPTS is CHARACTER*(*) */ +/* > The character options to the subroutine NAME, concatenated */ +/* > into a single character string. For example, UPLO = 'U', */ +/* > TRANS = 'T', and DIAG = 'N' for a triangular routine would */ +/* > be specified as OPTS = 'UTN'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N1 */ +/* > \verbatim */ +/* > N1 is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N2 */ +/* > \verbatim */ +/* > N2 is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N3 */ +/* > \verbatim */ +/* > N3 is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N4 */ +/* > \verbatim */ +/* > N4 is INTEGER */ +/* > Problem dimensions for the subroutine NAME; these may not all */ +/* > be required. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The following conventions have been used when calling ILAENV from the */ +/* > LAPACK routines: */ +/* > 1) OPTS is a concatenation of all of the character options to */ +/* > subroutine NAME, in the same order that they appear in the */ +/* > argument list for NAME, even if they are not used in determining */ +/* > the value of the parameter specified by ISPEC. */ +/* > 2) The problem dimensions N1, N2, N3, N4 are specified in the order */ +/* > that they appear in the argument list for NAME. N1 is used */ +/* > first, N2 second, and so on, and unused problem dimensions are */ +/* > passed a value of -1. */ +/* > 3) The parameter value returned by ILAENV is checked for validity in */ +/* > the calling subroutine. For example, ILAENV is used to retrieve */ +/* > the optimal blocksize for STRTRI as follows: */ +/* > */ +/* > NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */ +/* > IF( NB.LE.1 ) NB = MAX( 1, N ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen + opts_len) +{ + /* System generated locals */ + integer ret_val, i__1, i__2, i__3; + + /* Builtin functions */ + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer i_len(char *, ftnlen), s_cmp(char *, char *, ftnlen, ftnlen); + + /* Local variables */ + logical twostage; + integer i__; + char c1[1], c2[2], c3[3], c4[2]; + integer ic, nb, iz, nx; + logical cname; + integer nbmin; + logical sname; + extern integer ieeeck_(integer *, real *, real *); + char subnam[16]; + extern integer iparmq_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + switch (*ispec) { + case 1: goto L10; + case 2: goto L10; + case 3: goto L10; + case 4: goto L80; + case 5: goto L90; + case 6: goto L100; + case 7: goto L110; + case 8: goto L120; + case 9: goto L130; + case 10: goto L140; + case 11: goto L150; + case 12: goto L160; + case 13: goto L160; + case 14: goto L160; + case 15: goto L160; + case 16: goto L160; + case 17: goto L160; + } + +/* Invalid value for ISPEC */ + + ret_val = -1; + return ret_val; + +L10: + +/* Convert NAME to upper case if the first character is lower case. */ + + ret_val = 1; + s_copy(subnam, name__, (ftnlen)16, name_len); + ic = *(unsigned char *)subnam; + iz = 'Z'; + if (iz == 90 || iz == 122) { + +/* ASCII character set */ + + if (ic >= 97 && ic <= 122) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 97 && ic <= 122) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L20: */ + } + } + + } else if (iz == 233 || iz == 169) { + +/* EBCDIC character set */ + + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && + ic <= 169) { + *(unsigned char *)subnam = (char) (ic + 64); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= + 162 && ic <= 169) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); + } +/* L30: */ + } + } + + } else if (iz == 218 || iz == 250) { + +/* Prime machines: ASCII+128 */ + + if (ic >= 225 && ic <= 250) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 225 && ic <= 250) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L40: */ + } + } + } + + *(unsigned char *)c1 = *(unsigned char *)subnam; + sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; + cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; + if (! (cname || sname)) { + return ret_val; + } + s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2); + s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3); + s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2); + twostage = i_len(subnam, (ftnlen)16) >= 11 && *(unsigned char *)&subnam[ + 10] == '2'; + + switch (*ispec) { + case 1: goto L50; + case 2: goto L60; + case 3: goto L70; + } + +L50: + +/* ISPEC = 1: block size */ + +/* In these examples, separate code is provided for setting NB for */ +/* real and complex. We assume that NB will take the same value in */ +/* single or double precision. */ + + nb = 1; + + if (s_cmp(subnam + 1, (char *)"LAORH", (ftnlen)5, (ftnlen)5) == 0) { + +/* This is for *LAORHR_GETRFNP routine */ + + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c2, (char *)"GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, (char *)"QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, + (char *)"RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, (char *)"LQF", (ftnlen) + 3, (ftnlen)3) == 0 || s_cmp(c3, (char *)"QLF", (ftnlen)3, (ftnlen)3) + == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, (char *)"QR ", (ftnlen)3, (ftnlen)3) == 0) { + if (*n3 == 1) { + if (sname) { +/* M*N */ + if (*n1 * *n2 <= 131072 || *n1 <= 8192) { + nb = *n1; + } else { + nb = 32768 / *n2; + } + } else { + if (*n1 * *n2 <= 131072 || *n1 <= 8192) { + nb = *n1; + } else { + nb = 32768 / *n2; + } + } + } else { + if (sname) { + nb = 1; + } else { + nb = 1; + } + } + } else if (s_cmp(c3, (char *)"LQ ", (ftnlen)3, (ftnlen)3) == 0) { + if (*n3 == 2) { + if (sname) { +/* M*N */ + if (*n1 * *n2 <= 131072 || *n1 <= 8192) { + nb = *n1; + } else { + nb = 32768 / *n2; + } + } else { + if (*n1 * *n2 <= 131072 || *n1 <= 8192) { + nb = *n1; + } else { + nb = 32768 / *n2; + } + } + } else { + if (sname) { + nb = 1; + } else { + nb = 1; + } + } + } else if (s_cmp(c3, (char *)"HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, (char *)"BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, (char *)"TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, (char *)"PO", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, (char *)"SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (twostage) { + nb = 192; + } else { + nb = 64; + } + } else { + if (twostage) { + nb = 192; + } else { + nb = 64; + } + } + } else if (sname && s_cmp(c3, (char *)"TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (sname && s_cmp(c3, (char *)"GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (cname && s_cmp(c2, (char *)"HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (twostage) { + nb = 192; + } else { + nb = 64; + } + } else if (s_cmp(c3, (char *)"TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (s_cmp(c3, (char *)"GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (sname && s_cmp(c2, (char *)"OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (cname && s_cmp(c2, (char *)"UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (s_cmp(c2, (char *)"GB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, (char *)"PB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, (char *)"EVC", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, (char *)"SYL", (ftnlen)3, (ftnlen)3) == 0) { +/* The upper bound is to prevent overly aggressive scaling. */ + if (sname) { +/* Computing MIN */ +/* Computing MAX */ + i__2 = 48, i__3 = (min(*n1,*n2) << 4) / 100; + i__1 = max(i__2,i__3); + nb = min(i__1,240); + } else { +/* Computing MIN */ +/* Computing MAX */ + i__2 = 24, i__3 = (min(*n1,*n2) << 3) / 100; + i__1 = max(i__2,i__3); + nb = min(i__1,80); + } + } + } else if (s_cmp(c2, (char *)"LA", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"UUM", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, (char *)"TRS", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } + } else if (sname && s_cmp(c2, (char *)"ST", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"EBZ", (ftnlen)3, (ftnlen)3) == 0) { + nb = 1; + } + } else if (s_cmp(c2, (char *)"GG", (ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + if (s_cmp(c3, (char *)"HD3", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } + } + ret_val = nb; + return ret_val; + +L60: + +/* ISPEC = 2: minimum block size */ + + nbmin = 2; + if (s_cmp(c2, (char *)"GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, (char *)"RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, (char *)"LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, (char *)"QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, (char *)"HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, (char *)"BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, (char *)"TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } + } else if (s_cmp(c2, (char *)"SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 8; + } else { + nbmin = 8; + } + } else if (sname && s_cmp(c3, (char *)"TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (cname && s_cmp(c2, (char *)"HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (sname && s_cmp(c2, (char *)"OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } else if (cname && s_cmp(c2, (char *)"UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } else if (s_cmp(c2, (char *)"GG", (ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + if (s_cmp(c3, (char *)"HD3", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } + ret_val = nbmin; + return ret_val; + +L70: + +/* ISPEC = 3: crossover point */ + + nx = 0; + if (s_cmp(c2, (char *)"GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, (char *)"RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, (char *)"LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, (char *)"QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, (char *)"HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, (char *)"BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } + } else if (s_cmp(c2, (char *)"SY", (ftnlen)2, (ftnlen)2) == 0) { + if (sname && s_cmp(c3, (char *)"TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (cname && s_cmp(c2, (char *)"HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, (char *)"TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (sname && s_cmp(c2, (char *)"OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } else if (cname && s_cmp(c2, (char *)"UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, (char *)"QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, (char *)"QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, (char *)"HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, (char *)"TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, (char *)"BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } else if (s_cmp(c2, (char *)"GG", (ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + if (s_cmp(c3, (char *)"HD3", (ftnlen)3, (ftnlen)3) == 0) { + nx = 128; + } + } + ret_val = nx; + return ret_val; + +L80: + +/* ISPEC = 4: number of shifts (used by xHSEQR) */ + + ret_val = 6; + return ret_val; + +L90: + +/* ISPEC = 5: minimum column dimension (not used) */ + + ret_val = 2; + return ret_val; + +L100: + +/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ + + ret_val = (integer) ((real) min(*n1,*n2) * (float)1.6); + return ret_val; + +L110: + +/* ISPEC = 7: number of processors (not used) */ + + ret_val = 1; + return ret_val; + +L120: + +/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ + + ret_val = 50; + return ret_val; + +L130: + +/* ISPEC = 9: maximum size of the subproblems at the bottom of the */ +/* computation tree in the divide-and-conquer algorithm */ +/* (used by xGELSD and xGESDD) */ + + ret_val = 25; + return ret_val; + +L140: + +/* ISPEC = 10: ieee and infinity NaN arithmetic can be trusted not to trap */ + +/* ILAENV = 0 */ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__1, &c_b176, &c_b177); + } + return ret_val; + +L150: + +/* ISPEC = 11: ieee infinity arithmetic can be trusted not to trap */ + +/* ILAENV = 0 */ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__0, &c_b176, &c_b177); + } + return ret_val; + +L160: + +/* 12 <= ISPEC <= 17: xHSEQR or related subroutines. */ + + ret_val = iparmq_(ispec, name__, opts, n1, n2, n3, n4, name_len, opts_len) + ; + return ret_val; + +/* End of ILAENV */ + +} /* ilaenv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ilazlc.cpp b/lib/linalg/ilazlc.cpp new file mode 100644 index 0000000000..309c9e5edb --- /dev/null +++ b/lib/linalg/ilazlc.cpp @@ -0,0 +1,156 @@ +/* fortran/ilazlc.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ILAZLC scans a matrix for its last non-zero column. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ILAZLC + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* INTEGER FUNCTION ILAZLC( M, N, A, LDA ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER M, N, LDA */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ILAZLC scans A for its last non-zero column. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The m by n matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +integer ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + integer i__; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*n == 0) { + ret_val = *n; + } else /* if(complicated condition) */ { + i__1 = *n * a_dim1 + 1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2] + .i != 0.)) { + ret_val = *n; + } else { +/* Now scan each column from the end, returning with the first non-zero. */ + for (ret_val = *n; ret_val >= 1; --ret_val) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + ret_val * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + return ret_val; + } + } + } + } + } + return ret_val; +} /* ilazlc_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ilazlr.cpp b/lib/linalg/ilazlr.cpp new file mode 100644 index 0000000000..8ca4fd8029 --- /dev/null +++ b/lib/linalg/ilazlr.cpp @@ -0,0 +1,159 @@ +/* fortran/ilazlr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ILAZLR scans a matrix for its last non-zero row. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ILAZLR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* INTEGER FUNCTION ILAZLR( M, N, A, LDA ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER M, N, LDA */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ILAZLR scans A for its last non-zero row. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The m by n matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +integer ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, ret_val, i__1, i__2; + + /* Local variables */ + integer i__, j; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick test for the common case where one corner is non-zero. */ + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (*m == 0) { + ret_val = *m; + } else /* if(complicated condition) */ { + i__1 = *m + a_dim1; + i__2 = *m + *n * a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2] + .i != 0.)) { + ret_val = *m; + } else { +/* Scan up each column tracking the last zero row seen. */ + ret_val = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__ = *m; + for(;;) { /* while(complicated condition) */ + i__2 = max(i__,1) + j * a_dim1; + if (!(a[i__2].r == 0. && a[i__2].i == 0. && i__ >= 1)) + break; + --i__; + } + ret_val = max(ret_val,i__); + } + } + } + return ret_val; +} /* ilazlr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/iparmq.cpp b/lib/linalg/iparmq.cpp new file mode 100644 index 0000000000..12f3b7f7f6 --- /dev/null +++ b/lib/linalg/iparmq.cpp @@ -0,0 +1,449 @@ +/* fortran/iparmq.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b IPARMQ */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download IPARMQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER IHI, ILO, ISPEC, LWORK, N */ +/* CHARACTER NAME*( * ), OPTS*( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > This program sets problem and machine dependent parameters */ +/* > useful for xHSEQR and related subroutines for eigenvalue */ +/* > problems. It is called whenever */ +/* > IPARMQ is called with 12 <= ISPEC <= 16 */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] ISPEC */ +/* > \verbatim */ +/* > ISPEC is INTEGER */ +/* > ISPEC specifies which tunable parameter IPARMQ should */ +/* > return. */ +/* > */ +/* > ISPEC=12: (INMIN) Matrices of order nmin or less */ +/* > are sent directly to xLAHQR, the implicit */ +/* > double shift QR algorithm. NMIN must be */ +/* > at least 11. */ +/* > */ +/* > ISPEC=13: (INWIN) Size of the deflation window. */ +/* > This is best set greater than or equal to */ +/* > the number of simultaneous shifts NS. */ +/* > Larger matrices benefit from larger deflation */ +/* > windows. */ +/* > */ +/* > ISPEC=14: (INIBL) Determines when to stop nibbling and */ +/* > invest in an (expensive) multi-shift QR sweep. */ +/* > If the aggressive early deflation subroutine */ +/* > finds LD converged eigenvalues from an order */ +/* > NW deflation window and LD > (NW*NIBBLE)/100, */ +/* > then the next QR sweep is skipped and early */ +/* > deflation is applied immediately to the */ +/* > remaining active diagonal block. Setting */ +/* > IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a */ +/* > multi-shift QR sweep whenever early deflation */ +/* > finds a converged eigenvalue. Setting */ +/* > IPARMQ(ISPEC=14) greater than or equal to 100 */ +/* > prevents TTQRE from skipping a multi-shift */ +/* > QR sweep. */ +/* > */ +/* > ISPEC=15: (NSHFTS) The number of simultaneous shifts in */ +/* > a multi-shift QR iteration. */ +/* > */ +/* > ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the */ +/* > following meanings. */ +/* > 0: During the multi-shift QR/QZ sweep, */ +/* > blocked eigenvalue reordering, blocked */ +/* > Hessenberg-triangular reduction, */ +/* > reflections and/or rotations are not */ +/* > accumulated when updating the */ +/* > far-from-diagonal matrix entries. */ +/* > 1: During the multi-shift QR/QZ sweep, */ +/* > blocked eigenvalue reordering, blocked */ +/* > Hessenberg-triangular reduction, */ +/* > reflections and/or rotations are */ +/* > accumulated, and matrix-matrix */ +/* > multiplication is used to update the */ +/* > far-from-diagonal matrix entries. */ +/* > 2: During the multi-shift QR/QZ sweep, */ +/* > blocked eigenvalue reordering, blocked */ +/* > Hessenberg-triangular reduction, */ +/* > reflections and/or rotations are */ +/* > accumulated, and 2-by-2 block structure */ +/* > is exploited during matrix-matrix */ +/* > multiplies. */ +/* > (If xTRMM is slower than xGEMM, then */ +/* > IPARMQ(ISPEC=16)=1 may be more efficient than */ +/* > IPARMQ(ISPEC=16)=2 despite the greater level of */ +/* > arithmetic work implied by the latter choice.) */ +/* > */ +/* > ISPEC=17: (ICOST) An estimate of the relative cost of flops */ +/* > within the near-the-diagonal shift chase compared */ +/* > to flops within the BLAS calls of a QZ sweep. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NAME */ +/* > \verbatim */ +/* > NAME is CHARACTER string */ +/* > Name of the calling subroutine */ +/* > \endverbatim */ +/* > */ +/* > \param[in] OPTS */ +/* > \verbatim */ +/* > OPTS is CHARACTER string */ +/* > This is a concatenation of the string arguments to */ +/* > TTQRE. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > N is the order of the Hessenberg matrix H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ILO */ +/* > \verbatim */ +/* > ILO is INTEGER */ +/* > \endverbatim */ +/* > */ +/* > \param[in] IHI */ +/* > \verbatim */ +/* > IHI is INTEGER */ +/* > It is assumed that H is already upper triangular */ +/* > in rows and columns 1:ILO-1 and IHI+1:N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The amount of workspace available. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Little is known about how best to choose these parameters. */ +/* > It is possible to use different values of the parameters */ +/* > for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. */ +/* > */ +/* > It is probably best to choose different parameters for */ +/* > different matrices and different parameters at different */ +/* > times during the iteration, but this has not been */ +/* > implemented --- yet. */ +/* > */ +/* > */ +/* > The best choices of most of the parameters depend */ +/* > in an ill-understood way on the relative execution */ +/* > rate of xLAQR3 and xLAQR5 and on the nature of each */ +/* > particular eigenvalue problem. Experiment may be the */ +/* > only practical way to determine which choices are most */ +/* > effective. */ +/* > */ +/* > Following is a list of default values supplied by IPARMQ. */ +/* > These defaults may be adjusted in order to attain better */ +/* > performance in any particular computational environment. */ +/* > */ +/* > IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. */ +/* > Default: 75. (Must be at least 11.) */ +/* > */ +/* > IPARMQ(ISPEC=13) Recommended deflation window size. */ +/* > This depends on ILO, IHI and NS, the */ +/* > number of simultaneous shifts returned */ +/* > by IPARMQ(ISPEC=15). The default for */ +/* > (IHI-ILO+1) <= 500 is NS. The default */ +/* > for (IHI-ILO+1) > 500 is 3*NS/2. */ +/* > */ +/* > IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. */ +/* > */ +/* > IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. */ +/* > a multi-shift QR iteration. */ +/* > */ +/* > If IHI-ILO+1 is ... */ +/* > */ +/* > greater than ...but less ... the */ +/* > or equal to ... than default is */ +/* > */ +/* > 0 30 NS = 2+ */ +/* > 30 60 NS = 4+ */ +/* > 60 150 NS = 10 */ +/* > 150 590 NS = ** */ +/* > 590 3000 NS = 64 */ +/* > 3000 6000 NS = 128 */ +/* > 6000 infinity NS = 256 */ +/* > */ +/* > (+) By default matrices of this order are */ +/* > passed to the implicit double shift routine */ +/* > xLAHQR. See IPARMQ(ISPEC=12) above. These */ +/* > values of NS are used only in case of a rare */ +/* > xLAHQR failure. */ +/* > */ +/* > (**) The asterisks (**) indicate an ad-hoc */ +/* > function increasing from 10 to 64. */ +/* > */ +/* > IPARMQ(ISPEC=16) Select structured matrix multiply. */ +/* > (See ISPEC=16 above for details.) */ +/* > Default: 3. */ +/* > */ +/* > IPARMQ(ISPEC=17) Relative cost heuristic for blocksize selection. */ +/* > Expressed as a percentage. */ +/* > Default: 10. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +integer iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer + *ilo, integer *ihi, integer *lwork, ftnlen name_len, ftnlen opts_len) +{ + /* System generated locals */ + integer ret_val, i__1, i__2; + real r__1; + + /* Builtin functions */ + double log(doublereal); + integer i_nint(real *); + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer s_cmp(char *, char *, ftnlen, ftnlen); + + /* Local variables */ + integer i__, ic, nh, ns, iz; + char subnam[6]; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ + +/* ================================================================ */ +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + if (*ispec == 15 || *ispec == 13 || *ispec == 16) { + +/* ==== Set the number simultaneous shifts ==== */ + + nh = *ihi - *ilo + 1; + ns = 2; + if (nh >= 30) { + ns = 4; + } + if (nh >= 60) { + ns = 10; + } + if (nh >= 150) { +/* Computing MAX */ + r__1 = log((real) nh) / log((float)2.); + i__1 = 10, i__2 = nh / i_nint(&r__1); + ns = max(i__1,i__2); + } + if (nh >= 590) { + ns = 64; + } + if (nh >= 3000) { + ns = 128; + } + if (nh >= 6000) { + ns = 256; + } +/* Computing MAX */ + i__1 = 2, i__2 = ns - ns % 2; + ns = max(i__1,i__2); + } + + if (*ispec == 12) { + + +/* ===== Matrices of order smaller than NMIN get sent */ +/* . to xLAHQR, the classic double shift algorithm. */ +/* . This must be at least 11. ==== */ + + ret_val = 75; + + } else if (*ispec == 14) { + +/* ==== INIBL: skip a multi-shift qr iteration and */ +/* . whenever aggressive early deflation finds */ +/* . at least (NIBBLE*(window size)/100) deflations. ==== */ + + ret_val = 14; + + } else if (*ispec == 15) { + +/* ==== NSHFTS: The number of simultaneous shifts ===== */ + + ret_val = ns; + + } else if (*ispec == 13) { + +/* ==== NW: deflation window size. ==== */ + + if (nh <= 500) { + ret_val = ns; + } else { + ret_val = ns * 3 / 2; + } + + } else if (*ispec == 16) { + +/* ==== IACC22: Whether to accumulate reflections */ +/* . before updating the far-from-diagonal elements */ +/* . and whether to use 2-by-2 block structure while */ +/* . doing it. A small amount of work could be saved */ +/* . by making this choice dependent also upon the */ +/* . NH=IHI-ILO+1. */ + + +/* Convert NAME to upper case if the first character is lower case. */ + + ret_val = 0; + s_copy(subnam, name__, (ftnlen)6, name_len); + ic = *(unsigned char *)subnam; + iz = 'Z'; + if (iz == 90 || iz == 122) { + +/* ASCII character set */ + + if (ic >= 97 && ic <= 122) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 97 && ic <= 122) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } + } + } + + } else if (iz == 233 || iz == 169) { + +/* EBCDIC character set */ + + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 + && ic <= 169) { + *(unsigned char *)subnam = (char) (ic + 64); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || + ic >= 162 && ic <= 169) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); + } + } + } + + } else if (iz == 218 || iz == 250) { + +/* Prime machines: ASCII+128 */ + + if (ic >= 225 && ic <= 250) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 225 && ic <= 250) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } + } + } + } + + if (s_cmp(subnam + 1, (char *)"GGHRD", (ftnlen)5, (ftnlen)5) == 0 || s_cmp( + subnam + 1, (char *)"GGHD3", (ftnlen)5, (ftnlen)5) == 0) { + ret_val = 1; + if (nh >= 14) { + ret_val = 2; + } + } else if (s_cmp(subnam + 3, (char *)"EXC", (ftnlen)3, (ftnlen)3) == 0) { + if (nh >= 14) { + ret_val = 1; + } + if (nh >= 14) { + ret_val = 2; + } + } else if (s_cmp(subnam + 1, (char *)"HSEQR", (ftnlen)5, (ftnlen)5) == 0 || + s_cmp(subnam + 1, (char *)"LAQR", (ftnlen)4, (ftnlen)4) == 0) { + if (ns >= 14) { + ret_val = 1; + } + if (ns >= 14) { + ret_val = 2; + } + } + + } else if (*ispec == 17) { + +/* === Relative cost of near-the-diagonal chase vs */ +/* BLAS updates === */ + + ret_val = 10; + } else { +/* ===== invalid value of ispec ===== */ + ret_val = -1; + + } + +/* ==== End of IPARMQ ==== */ + + return ret_val; +} /* iparmq_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/lmp_f2c.h b/lib/linalg/lmp_f2c.h new file mode 100644 index 0000000000..0b1ebac99c --- /dev/null +++ b/lib/linalg/lmp_f2c.h @@ -0,0 +1,223 @@ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +typedef int integer; +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef long int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; +#ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ +typedef long long longint; /* system-dependent */ +typedef unsigned long long ulongint; /* system-dependent */ +#define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) +#define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) +#endif + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +#ifdef f2c_i2 +/* for -i2 */ +typedef short flag; +typedef short ftnlen; +typedef short ftnint; +#else +typedef long int flag; +typedef long int ftnlen; +typedef long int ftnint; +#endif + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +/*typedef long int Long;*/ /* No longer used; formerly in Namelist */ + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (doublereal)abs(x) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef int /* Unknown procedure type */ (*U_fp)(...); +typedef shortint (*J_fp)(...); +typedef integer (*I_fp)(...); +typedef real (*R_fp)(...); +typedef doublereal (*D_fp)(...), (*E_fp)(...); +typedef /* Complex */ VOID (*C_fp)(...); +typedef /* Double Complex */ VOID (*Z_fp)(...); +typedef logical (*L_fp)(...); +typedef shortlogical (*K_fp)(...); +typedef /* Character */ VOID (*H_fp)(...); +typedef /* Subroutine */ int (*S_fp)(...); +#else +typedef int /* Unknown procedure type */ (*U_fp)(); +typedef shortint (*J_fp)(); +typedef integer (*I_fp)(); +typedef real (*R_fp)(); +typedef doublereal (*D_fp)(), (*E_fp)(); +typedef /* Complex */ VOID (*C_fp)(); +typedef /* Double Complex */ VOID (*Z_fp)(); +typedef logical (*L_fp)(); +typedef shortlogical (*K_fp)(); +typedef /* Character */ VOID (*H_fp)(); +typedef /* Subroutine */ int (*S_fp)(); +#endif +/* E_fp is for real functions when -R is not specified */ +typedef VOID C_f; /* complex function */ +typedef VOID H_f; /* character function */ +typedef VOID Z_f; /* double complex function */ +typedef doublereal E_f; /* real function with -R not specified */ + +/* undef any lower-case symbols that your C compiler predefines, e.g.: */ + +#ifndef Skip_f2c_Undefs +#undef cray +#undef gcos +#undef mc68010 +#undef mc68020 +#undef mips +#undef pdp11 +#undef sgi +#undef sparc +#undef sun +#undef sun2 +#undef sun3 +#undef sun4 +#undef u370 +#undef u3b +#undef u3b2 +#undef u3b5 +#undef unix +#undef vax +#endif +#endif diff --git a/lib/linalg/lsame.cpp b/lib/linalg/lsame.cpp new file mode 100644 index 0000000000..dcb89e3d00 --- /dev/null +++ b/lib/linalg/lsame.cpp @@ -0,0 +1,17 @@ + +#include + +extern "C" { + +#include "lmp_f2c.h" + +logical lsame_(const char *a, const char *b) +{ + char ua, ub; + if (!a || !b) return FALSE_; + + ua = toupper(*a); + ub = toupper(*b); + return (ua == ub) ? TRUE_ : FALSE_; +} +} diff --git a/lib/linalg/static/README b/lib/linalg/static/README new file mode 100644 index 0000000000..c06337c2dd --- /dev/null +++ b/lib/linalg/static/README @@ -0,0 +1,5 @@ +The C++ files in this folder are direct C++ implementations of their +Fortran equivalents using the C++ runtime. +. +The Fortran files in this folder are modified from their +original versions, so that f2c can correctly translate them. diff --git a/lib/linalg/static/dgetrf2.f b/lib/linalg/static/dgetrf2.f new file mode 100644 index 0000000000..e3d2aac299 --- /dev/null +++ b/lib/linalg/static/dgetrf2.f @@ -0,0 +1,269 @@ +*> \brief \b DGETRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETRF2 computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = min(m,n)/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> [ A11 ] +*> The subroutine calls itself to factor [ --- ], +*> [ A12 ] +*> [ A12 ] +*> do the swaps on [ --- ], solve A12, update A22, +*> [ A22 ] +*> +*> then calls itself to factor A22 and do the swaps on A21. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SFMIN, TEMP + INTEGER I, IINFO, N1, N2 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER IDAMAX + EXTERNAL DLAMCH, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DSCAL, DLASWP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN + + IF ( M.EQ.1 ) THEN +* +* Use unblocked code for one row case +* Just need to handle IPIV and INFO +* + IPIV( 1 ) = 1 + IF ( A(1,1).EQ.ZERO ) + $ INFO = 1 +* + ELSE IF( N.EQ.1 ) THEN +* +* Use unblocked code for one column case +* +* +* Compute machine safe minimum +* + SFMIN = DLAMCH('S') +* +* Find pivot and test for singularity +* + I = IDAMAX( M, A( 1, 1 ), 1 ) + IPIV( 1 ) = I + IF( A( I, 1 ).NE.ZERO ) THEN +* +* Apply the interchange +* + IF( I.NE.1 ) THEN + TEMP = A( 1, 1 ) + A( 1, 1 ) = A( I, 1 ) + A( I, 1 ) = TEMP + END IF +* +* Compute elements 2:M of the column +* + IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN + CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 ) + ELSE + DO 10 I = 1, M-1 + A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 ) + 10 CONTINUE + END IF +* + ELSE + INFO = 1 + END IF +* + ELSE +* +* Use recursive code +* + N1 = MIN( M, N ) / 2 + N2 = N-N1 +* +* [ A11 ] +* Factor [ --- ] +* [ A21 ] +* + CALL DGETRF2( M, N1, A, LDA, IPIV, IINFO ) + + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* [ A12 ] +* Apply interchanges to [ --- ] +* [ A22 ] +* + CALL DLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 ) +* +* Solve A12 +* + CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA, + $ A( 1, N1+1 ), LDA ) +* +* Update A22 +* + CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA ) +* +* Factor A22 +* + CALL DGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ), + $ IINFO ) +* +* Adjust INFO and the pivot indices +* + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + N1 + DO 20 I = N1+1, MIN( M, N ) + IPIV( I ) = IPIV( I ) + N1 + 20 CONTINUE +* +* Apply interchanges to A21 +* + CALL DLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 ) +* + END IF + RETURN +* +* End of DGETRF2 +* + END diff --git a/lib/linalg/static/disnan.cpp b/lib/linalg/static/disnan.cpp new file mode 100644 index 0000000000..9e5bc1094e --- /dev/null +++ b/lib/linalg/static/disnan.cpp @@ -0,0 +1,14 @@ + +#include + +extern "C" { + +#include "lmp_f2c.h" + +logical disnan_(const doublereal *din) +{ + if (!din) return TRUE_; + + return std::isnan(*din) ? TRUE_ : FALSE_; +} +} diff --git a/lib/linalg/static/dlamch.cpp b/lib/linalg/static/dlamch.cpp new file mode 100644 index 0000000000..3d616d95d2 --- /dev/null +++ b/lib/linalg/static/dlamch.cpp @@ -0,0 +1,45 @@ + +#include +#include + +extern "C" { + +#include "lmp_f2c.h" + +// undefine conflicting f2c macros +#undef min +#undef max + +doublereal dlamch_(const char *cmach) +{ + if (!cmach) return 0.0; + char select = toupper(*cmach); + + // BLAS assumes rounding not truncation => epsilon is half + const double eps = 0.5 * std::numeric_limits::epsilon(); + if (select == 'E') return eps; + + double min = std::numeric_limits::min(); + const double max = std::numeric_limits::max(); + double small = 1.0 / max; + if (small >= min) min = small * (1.0 + eps); + if (select == 'S') return min; + + const double radix = std::numeric_limits::radix; + if (select == 'B') return radix; + + if (select == 'P') return eps * radix; + + if (select == 'N') return std::numeric_limits::digits; + + if (select == 'M') return std::numeric_limits::min_exponent; + + if (select == 'U') return min; + + if (select == 'L') return std::numeric_limits::max_exponent; + + if (select == 'O') return max; + + return 0.0; +} +} diff --git a/lib/linalg/static/dlarft.f b/lib/linalg/static/dlarft.f new file mode 100644 index 0000000000..e92c927971 --- /dev/null +++ b/lib/linalg/static/dlarft.f @@ -0,0 +1,327 @@ +*> \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) GOTO 219 + END DO + 219 CONTINUE + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) GOTO 235 + END DO + 235 CONTINUE + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, + $ T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) GOTO 280 + END DO + 280 CONTINUE + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL DGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) GOTO 296 + END DO + 296 CONTINUE + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL DGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of DLARFT +* + END diff --git a/lib/linalg/static/dpotrf2.f b/lib/linalg/static/dpotrf2.f new file mode 100644 index 0000000000..ba827635a8 --- /dev/null +++ b/lib/linalg/static/dpotrf2.f @@ -0,0 +1,234 @@ +*> \brief \b DPOTRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE DPOTRF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOTRF2 computes the Cholesky factorization of a real symmetric +*> positive definite matrix A using the recursive algorithm. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = n/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> The subroutine calls itself to factor A11. Update and scale A21 +*> or A12, update A22 then calls itself to factor A22. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOTRF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER N1, N2, IINFO +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* N=1 case +* + IF( N.EQ.1 ) THEN +* +* Test for non-positive-definiteness +* + IF( A( 1, 1 ).LE.ZERO.OR.DISNAN( A( 1, 1 ) ) ) THEN + INFO = 1 + RETURN + END IF +* +* Factor +* + A( 1, 1 ) = SQRT( A( 1, 1 ) ) +* +* Use recursive code +* + ELSE + N1 = N/2 + N2 = N-N1 +* +* Factor A11 +* + CALL DPOTRF2( UPLO, N1, A( 1, 1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U +* + IF( UPPER ) THEN +* +* Update and scale A12 +* + CALL DTRSM( 'L', 'U', 'T', 'N', N1, N2, ONE, + $ A( 1, 1 ), LDA, A( 1, N1+1 ), LDA ) +* +* Update and factor A22 +* + CALL DSYRK( UPLO, 'T', N2, N1, -ONE, A( 1, N1+1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) + CALL DPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF +* +* Compute the Cholesky factorization A = L*L**T +* + ELSE +* +* Update and scale A21 +* + CALL DTRSM( 'R', 'L', 'T', 'N', N2, N1, ONE, + $ A( 1, 1 ), LDA, A( N1+1, 1 ), LDA ) +* +* Update and factor A22 +* + CALL DSYRK( UPLO, 'N', N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) + CALL DPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF + END IF + END IF + RETURN +* +* End of DPOTRF2 +* + END diff --git a/lib/linalg/static/lsame.cpp b/lib/linalg/static/lsame.cpp new file mode 100644 index 0000000000..dcb89e3d00 --- /dev/null +++ b/lib/linalg/static/lsame.cpp @@ -0,0 +1,17 @@ + +#include + +extern "C" { + +#include "lmp_f2c.h" + +logical lsame_(const char *a, const char *b) +{ + char ua, ub; + if (!a || !b) return FALSE_; + + ua = toupper(*a); + ub = toupper(*b); + return (ua == ub) ? TRUE_ : FALSE_; +} +} diff --git a/lib/linalg/static/xerbla.cpp b/lib/linalg/static/xerbla.cpp new file mode 100644 index 0000000000..cf2f7b1c69 --- /dev/null +++ b/lib/linalg/static/xerbla.cpp @@ -0,0 +1,30 @@ + +#include +#include + +class LinalgException : public std::exception { + std::string message; + + public: + LinalgException() = delete; + + explicit LinalgException(const std::string &msg) { message = msg; } + const char *what() const noexcept override { return message.c_str(); } +}; + +extern "C" { + +#include "lmp_f2c.h" + +integer xerbla_(const char *srname, integer *info) +{ + std::string mesg = " ** On entry to "; + for (int i = 0; i < 1024; ++i) { + if ((srname[i] == '\0') || (srname[i] == ' ')) break; + mesg.push_back(srname[i]); + } + mesg += " parameter number " + std::to_string(*info) + " had an illegal value\n"; + throw LinalgException(mesg); + return 0; +} +} diff --git a/lib/linalg/static/zlarft.f b/lib/linalg/static/zlarft.f new file mode 100644 index 0000000000..b59ba93213 --- /dev/null +++ b/lib/linalg/static/zlarft.f @@ -0,0 +1,328 @@ +*> \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZTRMV, ZGEMM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) GOTO 220 + END DO + 220 CONTINUE + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL ZGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) GOTO 236 + END DO + 236 CONTINUE + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) GOTO 281 + END DO + 281 CONTINUE + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) GOTO 297 + END DO + 297 CONTINUE + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of ZLARFT +* + END diff --git a/lib/linalg/xerbla.cpp b/lib/linalg/xerbla.cpp new file mode 100644 index 0000000000..cf2f7b1c69 --- /dev/null +++ b/lib/linalg/xerbla.cpp @@ -0,0 +1,30 @@ + +#include +#include + +class LinalgException : public std::exception { + std::string message; + + public: + LinalgException() = delete; + + explicit LinalgException(const std::string &msg) { message = msg; } + const char *what() const noexcept override { return message.c_str(); } +}; + +extern "C" { + +#include "lmp_f2c.h" + +integer xerbla_(const char *srname, integer *info) +{ + std::string mesg = " ** On entry to "; + for (int i = 0; i < 1024; ++i) { + if ((srname[i] == '\0') || (srname[i] == ' ')) break; + mesg.push_back(srname[i]); + } + mesg += " parameter number " + std::to_string(*info) + " had an illegal value\n"; + throw LinalgException(mesg); + return 0; +} +} diff --git a/lib/linalg/zaxpy.cpp b/lib/linalg/zaxpy.cpp new file mode 100644 index 0000000000..2788743a40 --- /dev/null +++ b/lib/linalg/zaxpy.cpp @@ -0,0 +1,191 @@ +/* fortran/zaxpy.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZAXPY */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZAXPY(N,ZA,ZX,INCX,ZY,INCY) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ZA */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 ZX(*),ZY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZAXPY constant times a vector plus a vector. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ZA */ +/* > \verbatim */ +/* > ZA is COMPLEX*16 */ +/* > On entry, ZA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ZX */ +/* > \verbatim */ +/* > ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of ZX */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ZY */ +/* > \verbatim */ +/* > ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of ZY */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, 3/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx, doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2; + + /* Local variables */ + integer i__, ix, iy; + extern doublereal dcabs1_(doublecomplex *); + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (dcabs1_(za) == 0.) { + return 0; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__; + z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * + zx[i__4].i + za->i * zx[i__4].r; + z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i; + zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; + } + } else { + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + i__4 = ix; + z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * + zx[i__4].i + za->i * zx[i__4].r; + z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i; + zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; + ix += *incx; + iy += *incy; + } + } + + return 0; + +/* End of ZAXPY */ + +} /* zaxpy_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zcopy.cpp b/lib/linalg/zcopy.cpp new file mode 100644 index 0000000000..357bd961a0 --- /dev/null +++ b/lib/linalg/zcopy.cpp @@ -0,0 +1,168 @@ +/* fortran/zcopy.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZCOPY */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZCOPY(N,ZX,INCX,ZY,INCY) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 ZX(*),ZY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZCOPY copies a vector, x, to a vector, y. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ZX */ +/* > \verbatim */ +/* > ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of ZX */ +/* > \endverbatim */ +/* > */ +/* > \param[out] ZY */ +/* > \verbatim */ +/* > ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of ZY */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, linpack, 4/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zcopy_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + integer i__, ix, iy; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + } + } else { + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = ix; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + ix += *incx; + iy += *incy; + } + } + return 0; + +/* End of ZCOPY */ + +} /* zcopy_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zdotc.cpp b/lib/linalg/zdotc.cpp new file mode 100644 index 0000000000..943f923bea --- /dev/null +++ b/lib/linalg/zdotc.cpp @@ -0,0 +1,186 @@ +/* fortran/zdotc.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZDOTC */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 ZX(*),ZY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZDOTC forms the dot product of two complex vectors */ +/* > ZDOTC = X^H * Y */ +/* > */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ZX */ +/* > \verbatim */ +/* > ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of ZX */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ZY */ +/* > \verbatim */ +/* > ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of ZY */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, 3/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Double Complex */ VOID zdotc_(doublecomplex * ret_val, integer *n, + doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, ix, iy; + doublecomplex ztemp; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + ztemp.r = 0., ztemp.i = 0.; + ret_val->r = 0., ret_val->i = 0.; + if (*n <= 0) { + return ; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d_cnjg(&z__3, &zx[i__]); + i__2 = i__; + z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = + z__3.r * zy[i__2].i + z__3.i * zy[i__2].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + } + } else { + +/* code for unequal increments or equal increments */ +/* not equal to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d_cnjg(&z__3, &zx[ix]); + i__2 = iy; + z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = + z__3.r * zy[i__2].i + z__3.i * zy[i__2].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + ix += *incx; + iy += *incy; + } + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return ; + +/* End of ZDOTC */ + +} /* zdotc_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zdrot.cpp b/lib/linalg/zdrot.cpp new file mode 100644 index 0000000000..c6ec16b053 --- /dev/null +++ b/lib/linalg/zdrot.cpp @@ -0,0 +1,213 @@ +/* fortran/zdrot.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZDROT */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZDROT( N, ZX, INCX, ZY, INCY, C, S ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX, INCY, N */ +/* DOUBLE PRECISION C, S */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 ZX( * ), ZY( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Applies a plane rotation, where the cos and sin (c and s) are real */ +/* > and the vectors cx and cy are complex. */ +/* > jack dongarra, linpack, 3/11/78. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the vectors cx and cy. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ZX */ +/* > \verbatim */ +/* > ZX is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( N - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array ZX must contain the n */ +/* > element vector cx. On exit, ZX is overwritten by the updated */ +/* > vector cx. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > ZX. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ZY */ +/* > \verbatim */ +/* > ZY is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( N - 1 )*abs( INCY ) ). */ +/* > Before entry, the incremented array ZY must contain the n */ +/* > element vector cy. On exit, ZY is overwritten by the updated */ +/* > vector cy. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > ZY. INCY must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION */ +/* > On entry, C specifies the cosine, cos. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION */ +/* > On entry, S specifies the sine, sin. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level1 */ + +/* ===================================================================== */ +/* Subroutine */ int zdrot_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy, doublereal *c__, doublereal *s) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + integer i__, ix, iy; + doublecomplex ctemp; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + z__2.r = *c__ * zx[i__2].r, z__2.i = *c__ * zx[i__2].i; + i__3 = i__; + z__3.r = *s * zy[i__3].r, z__3.i = *s * zy[i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ctemp.r = z__1.r, ctemp.i = z__1.i; + i__2 = i__; + i__3 = i__; + z__2.r = *c__ * zy[i__3].r, z__2.i = *c__ * zy[i__3].i; + i__4 = i__; + z__3.r = *s * zx[i__4].r, z__3.i = *s * zx[i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; + i__2 = i__; + zx[i__2].r = ctemp.r, zx[i__2].i = ctemp.i; + } + } else { + +/* code for unequal increments or equal increments not equal */ +/* to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + z__2.r = *c__ * zx[i__2].r, z__2.i = *c__ * zx[i__2].i; + i__3 = iy; + z__3.r = *s * zy[i__3].r, z__3.i = *s * zy[i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + ctemp.r = z__1.r, ctemp.i = z__1.i; + i__2 = iy; + i__3 = iy; + z__2.r = *c__ * zy[i__3].r, z__2.i = *c__ * zy[i__3].i; + i__4 = ix; + z__3.r = *s * zx[i__4].r, z__3.i = *s * zx[i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; + zy[i__2].r = z__1.r, zy[i__2].i = z__1.i; + i__2 = ix; + zx[i__2].r = ctemp.r, zx[i__2].i = ctemp.i; + ix += *incx; + iy += *incy; + } + } + return 0; + +/* End of ZDROT */ + +} /* zdrot_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zdscal.cpp b/lib/linalg/zdscal.cpp new file mode 100644 index 0000000000..c7c6406782 --- /dev/null +++ b/lib/linalg/zdscal.cpp @@ -0,0 +1,169 @@ +/* fortran/zdscal.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZDSCAL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZDSCAL(N,DA,ZX,INCX) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION DA */ +/* INTEGER INCX,N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 ZX(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZDSCAL scales a vector by a constant. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DA */ +/* > \verbatim */ +/* > DA is DOUBLE PRECISION */ +/* > On entry, DA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ZX */ +/* > \verbatim */ +/* > ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of ZX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, 3/11/78. */ +/* > modified 3/93 to return if incx .le. 0. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zdscal_(integer *n, doublereal *da, doublecomplex *zx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublereal d__1, d__2; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + integer i__, nincx; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. Parameters .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + /* Parameter adjustments */ + --zx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0 || *da == 1.) { + return 0; + } + if (*incx == 1) { + +/* code for increment equal to 1 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + d__1 = *da * zx[i__3].r; + d__2 = *da * d_imag(&zx[i__]); + z__1.r = d__1, z__1.i = d__2; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; + } + } else { + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = i__; + i__4 = i__; + d__1 = *da * zx[i__4].r; + d__2 = *da * d_imag(&zx[i__]); + z__1.r = d__1, z__1.i = d__2; + zx[i__3].r = z__1.r, zx[i__3].i = z__1.i; + } + } + return 0; + +/* End of ZDSCAL */ + +} /* zdscal_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zgemm.cpp b/lib/linalg/zgemm.cpp new file mode 100644 index 0000000000..92434e720f --- /dev/null +++ b/lib/linalg/zgemm.cpp @@ -0,0 +1,763 @@ +/* fortran/zgemm.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZGEMM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ALPHA,BETA */ +/* INTEGER K,LDA,LDB,LDC,M,N */ +/* CHARACTER TRANSA,TRANSB */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGEMM performs one of the matrix-matrix operations */ +/* > */ +/* > C := alpha*op( A )*op( B ) + beta*C, */ +/* > */ +/* > where op( X ) is one of */ +/* > */ +/* > op( X ) = X or op( X ) = X**T or op( X ) = X**H, */ +/* > */ +/* > alpha and beta are scalars, and A, B and C are matrices, with op( A ) */ +/* > an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANSA */ +/* > \verbatim */ +/* > TRANSA is CHARACTER*1 */ +/* > On entry, TRANSA specifies the form of op( A ) to be used in */ +/* > the matrix multiplication as follows: */ +/* > */ +/* > TRANSA = 'N' or 'n', op( A ) = A. */ +/* > */ +/* > TRANSA = 'T' or 't', op( A ) = A**T. */ +/* > */ +/* > TRANSA = 'C' or 'c', op( A ) = A**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANSB */ +/* > \verbatim */ +/* > TRANSB is CHARACTER*1 */ +/* > On entry, TRANSB specifies the form of op( B ) to be used in */ +/* > the matrix multiplication as follows: */ +/* > */ +/* > TRANSB = 'N' or 'n', op( B ) = B. */ +/* > */ +/* > TRANSB = 'T' or 't', op( B ) = B**T. */ +/* > */ +/* > TRANSB = 'C' or 'c', op( B ) = B**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix */ +/* > op( A ) and of the matrix C. M must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix */ +/* > op( B ) and the number of columns of the matrix C. N must be */ +/* > at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > On entry, K specifies the number of columns of the matrix */ +/* > op( A ) and the number of rows of the matrix op( B ). K must */ +/* > be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is */ +/* > k when TRANSA = 'N' or 'n', and is m otherwise. */ +/* > Before entry with TRANSA = 'N' or 'n', the leading m by k */ +/* > part of the array A must contain the matrix A, otherwise */ +/* > the leading k by m part of the array A must contain the */ +/* > matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When TRANSA = 'N' or 'n' then */ +/* > LDA must be at least max( 1, m ), otherwise LDA must be at */ +/* > least max( 1, k ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is */ +/* > n when TRANSB = 'N' or 'n', and is k otherwise. */ +/* > Before entry with TRANSB = 'N' or 'n', the leading k by n */ +/* > part of the array B must contain the matrix B, otherwise */ +/* > the leading n by k part of the array B must contain the */ +/* > matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > On entry, LDB specifies the first dimension of B as declared */ +/* > in the calling (sub) program. When TRANSB = 'N' or 'n' then */ +/* > LDB must be at least max( 1, k ), otherwise LDB must be at */ +/* > least max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then C need not be set on input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension ( LDC, N ) */ +/* > Before entry, the leading m by n part of the array C must */ +/* > contain the matrix C, except when beta is zero, in which */ +/* > case C need not be set on entry. */ +/* > On exit, the array C is overwritten by the m by n matrix */ +/* > ( alpha*op( A )*op( B ) + beta*C ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > On entry, LDC specifies the first dimension of C as declared */ +/* > in the calling (sub) program. LDC must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * + c__, integer *ldc, ftnlen transa_len, ftnlen transb_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, l, info; + logical nota, notb; + doublecomplex temp; + logical conja, conjb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Set NOTA and NOTB as true if A and B respectively are not */ +/* conjugated or transposed, set CONJA and CONJB as true if A and */ +/* B respectively are to be transposed but not conjugated and set */ +/* NROWA and NROWB as the number of rows of A and B respectively. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1); + notb = lsame_(transb, (char *)"N", (ftnlen)1, (ftnlen)1); + conja = lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1); + conjb = lsame_(transb, (char *)"C", (ftnlen)1, (ftnlen)1); + if (nota) { + nrowa = *m; + } else { + nrowa = *k; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + +/* Test the input parameters. */ + + info = 0; + if (! nota && ! conja && ! lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! notb && ! conjb && ! lsame_(transb, (char *)"T", (ftnlen)1, (ftnlen) + 1)) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_((char *)"ZGEMM ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && + (beta->r == 1. && beta->i == 0.)) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__1.i = beta->r * c__[i__4].i + beta->i * c__[ + i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L50: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3] + .r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6] + .r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L70: */ + } +/* L80: */ + } +/* L90: */ + } + } else if (conja) { + +/* Form C := alpha*A**H*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L110: */ + } +/* L120: */ + } + } else { + +/* Form C := alpha*A**T*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = l + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L140: */ + } +/* L150: */ + } + } + } else if (nota) { + if (conjb) { + +/* Form C := alpha*A*B**H + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L160: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L170: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6] + .r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L180: */ + } +/* L190: */ + } +/* L200: */ + } + } else { + +/* Form C := alpha*A*B**T + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L210: */ + } + } else if (beta->r != 1. || beta->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__1.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L220: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3] + .r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6] + .r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L230: */ + } +/* L240: */ + } +/* L250: */ + } + } + } else if (conja) { + if (conjb) { + +/* Form C := alpha*A**H*B**H + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + d_cnjg(&z__4, &b[j + l * b_dim1]); + z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = + z__3.r * z__4.i + z__3.i * z__4.r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L260: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L270: */ + } +/* L280: */ + } + } else { + +/* Form C := alpha*A**H*B**T + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = j + l * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L290: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L300: */ + } +/* L310: */ + } + } + } else { + if (conjb) { + +/* Form C := alpha*A**T*B**H + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + d_cnjg(&z__3, &b[j + l * b_dim1]); + z__2.r = a[i__4].r * z__3.r - a[i__4].i * z__3.i, + z__2.i = a[i__4].r * z__3.i + a[i__4].i * + z__3.r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L320: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L330: */ + } +/* L340: */ + } + } else { + +/* Form C := alpha*A**T*B**T + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + i__4 = l + i__ * a_dim1; + i__5 = j + l * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5] + .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4] + .i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L350: */ + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, + z__2.i = alpha->r * temp.i + alpha->i * + temp.r; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4] + .i, z__3.i = beta->r * c__[i__4].i + beta->i * + c__[i__4].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } +/* L360: */ + } +/* L370: */ + } + } + } + + return 0; + +/* End of ZGEMM */ + +} /* zgemm_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zgemv.cpp b/lib/linalg/zgemv.cpp new file mode 100644 index 0000000000..9524445dba --- /dev/null +++ b/lib/linalg/zgemv.cpp @@ -0,0 +1,477 @@ +/* fortran/zgemv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZGEMV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ALPHA,BETA */ +/* INTEGER INCX,INCY,LDA,M,N */ +/* CHARACTER TRANS */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A(LDA,*),X(*),Y(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGEMV performs one of the matrix-vector operations */ +/* > */ +/* > y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or */ +/* > */ +/* > y := alpha*A**H*x + beta*y, */ +/* > */ +/* > where alpha and beta are scalars, x and y are vectors and A is an */ +/* > m by n matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ +/* > */ +/* > TRANS = 'T' or 't' y := alpha*A**T*x + beta*y. */ +/* > */ +/* > TRANS = 'C' or 'c' y := alpha*A**H*x + beta*y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix A. */ +/* > M must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension ( LDA, N ) */ +/* > Before entry, the leading m by n part of the array A must */ +/* > contain the matrix of coefficients. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ +/* > and at least */ +/* > ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ +/* > Before entry, the incremented array X must contain the */ +/* > vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then Y need not be set on input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Y */ +/* > \verbatim */ +/* > Y is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ +/* > and at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ +/* > Before entry with BETA non-zero, the incremented array Y */ +/* > must contain the vector y. On exit, Y is overwritten by the */ +/* > updated vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > The vector and matrix arguments are not referenced when N = 0, or M = 0 */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgemv_(char *trans, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * + incy, ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp; + integer lenx, leny; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noconj; + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"T", ( + ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1) + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_((char *)"ZGEMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == + 1. && beta->i == 0.)) { + return 0; + } + + noconj = lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1); + +/* Set LENX and LENY, the lengths of the vectors x and y, and set */ +/* up the start points in X and Y. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = + temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; +/* L50: */ + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, z__2.i = + temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + iy += *incy; +/* L70: */ + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A**T*x + y or y := alpha*A**H*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0., temp.i = 0.; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = + alpha->r * temp.i + alpha->i * temp.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jy += *incy; +/* L110: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = 0., temp.i = 0.; + ix = kx; + if (noconj) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4] + .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3] + .i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L120: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3] + .r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L130: */ + } + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = + alpha->r * temp.i + alpha->i * temp.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jy += *incy; +/* L140: */ + } + } + } + + return 0; + +/* End of ZGEMV */ + +} /* zgemv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zgerc.cpp b/lib/linalg/zgerc.cpp new file mode 100644 index 0000000000..db1e60a2eb --- /dev/null +++ b/lib/linalg/zgerc.cpp @@ -0,0 +1,289 @@ +/* fortran/zgerc.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZGERC */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ALPHA */ +/* INTEGER INCX,INCY,LDA,M,N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A(LDA,*),X(*),Y(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZGERC performs the rank 1 operation */ +/* > */ +/* > A := alpha*x*y**H + A, */ +/* > */ +/* > where alpha is a scalar, x is an m element vector, y is an n element */ +/* > vector and A is an m by n matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of the matrix A. */ +/* > M must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( m - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the m */ +/* > element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y */ +/* > \verbatim */ +/* > Y is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* > Before entry, the incremented array Y must contain the n */ +/* > element vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension ( LDA, N ) */ +/* > Before entry, the leading m by n part of the array A must */ +/* > contain the matrix of coefficients. On exit, A is */ +/* > overwritten by the updated matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zgerc_(integer *m, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, ix, jy, kx, info; + doublecomplex temp; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (*m < 0) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_((char *)"ZGERC ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (*incy > 0) { + jy = 1; + } else { + jy = 1 - (*n - 1) * *incy; + } + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + } + jy += *incy; +/* L20: */ + } + } else { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*m - 1) * *incx; + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jy; + if (y[i__2].r != 0. || y[i__2].i != 0.) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i = + x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + } + jy += *incy; +/* L40: */ + } + } + + return 0; + +/* End of ZGERC */ + +} /* zgerc_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zheev.cpp b/lib/linalg/zheev.cpp new file mode 100644 index 0000000000..f421e49090 --- /dev/null +++ b/lib/linalg/zheev.cpp @@ -0,0 +1,379 @@ +/* fortran/zheev.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b18 = 1.; + +/* > \brief ZHEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matr +ices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */ +/* INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEV computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. LWORK >= max(1,2*N-1). */ +/* > For optimal efficiency, LWORK >= (NB+1)*N, */ +/* > where NB is the blocksize for ZHETRD returned by ILAENV. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the algorithm failed to converge; i */ +/* > off-diagonal elements of an intermediate tridiagonal */ +/* > form did not converge to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16HEeigen */ + +/* ===================================================================== */ +/* Subroutine */ int zheev_(char *jobz, char *uplo, integer *n, doublecomplex + *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *info, ftnlen jobz_len, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer nb; + doublereal eps; + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + logical lower, wantz; + extern doublereal dlamch_(char *, ftnlen); + integer iscale; + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *, ftnlen, ftnlen); + integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *, ftnlen); + integer indwrk; + extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, integer *, integer *, ftnlen); + integer llwork; + doublereal smlnum; + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *, + ftnlen), zungtr_(char *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, integer *, ftnlen); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --rwork; + + /* Function Body */ + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -1; + } else if (! (lower || lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); +/* Computing MAX */ + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = max(i__1,i__2); + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + +/* Computing MAX */ + i__1 = 1, i__2 = (*n << 1) - 1; + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + work[1].r = 1., work[1].i = 0.; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_((char *)"Safe minimum", (ftnlen)12); + eps = dlamch_((char *)"Precision", (ftnlen)9); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhe_((char *)"M", uplo, n, &a[a_offset], lda, &rwork[1], (ftnlen)1, ( + ftnlen)1); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + zlascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, + info, (ftnlen)1); + } + +/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + llwork = *lwork - indwrk + 1; + zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo, (ftnlen)1); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* ZUNGTR to generate the unitary matrix, then call ZSTEQR. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zungtr_(uplo, n, &a[a_offset], lda, &work[indtau], &work[indwrk], & + llwork, &iinfo, (ftnlen)1); + indwrk = inde + *n; + zsteqr_(jobz, n, &w[1], &rwork[inde], &a[a_offset], lda, &rwork[ + indwrk], info, (ftnlen)1); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + +/* Set WORK(1) to optimal complex workspace size. */ + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + return 0; + +/* End of ZHEEV */ + +} /* zheev_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zheevd.cpp b/lib/linalg/zheevd.cpp new file mode 100644 index 0000000000..c340c17718 --- /dev/null +++ b/lib/linalg/zheevd.cpp @@ -0,0 +1,486 @@ +/* fortran/zheevd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b18 = 1.; + +/* > \brief ZHEEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE mat +rices */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHEEVD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, */ +/* LRWORK, IWORK, LIWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER JOBZ, UPLO */ +/* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION RWORK( * ), W( * ) */ +/* COMPLEX*16 A( LDA, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a */ +/* > complex Hermitian matrix A. If eigenvectors are desired, it uses a */ +/* > divide and conquer algorithm. */ +/* > */ +/* > The divide and conquer algorithm makes very mild assumptions about */ +/* > floating point arithmetic. It will work on machines with a guard */ +/* > digit in add/subtract, or on those binary machines without guard */ +/* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ +/* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] JOBZ */ +/* > \verbatim */ +/* > JOBZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only; */ +/* > = 'V': Compute eigenvalues and eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the */ +/* > leading N-by-N upper triangular part of A contains the */ +/* > upper triangular part of the matrix A. If UPLO = 'L', */ +/* > the leading N-by-N lower triangular part of A contains */ +/* > the lower triangular part of the matrix A. */ +/* > On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ +/* > orthonormal eigenvectors of the matrix A. */ +/* > If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') */ +/* > or the upper triangle (if UPLO='U') of A, including the */ +/* > diagonal, is destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > If INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The length of the array WORK. */ +/* > If N <= 1, LWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. */ +/* > If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, */ +/* > dimension (LRWORK) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of the array RWORK. */ +/* > If N <= 1, LRWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LRWORK must be at least N. */ +/* > If JOBZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 5*N + 2*N**2. */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If N <= 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'N' and N > 1, LIWORK must be at least 1. */ +/* > If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i and JOBZ = 'N', then the algorithm failed */ +/* > to converge; i off-diagonal elements of an intermediate */ +/* > tridiagonal form did not converge to zero; */ +/* > if INFO = i and JOBZ = 'V', then the algorithm failed */ +/* > to compute an eigenvalue while working on the submatrix */ +/* > lying in rows and columns INFO/(N+1) through */ +/* > mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16HEeigen */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > Modified description of INFO. Sven, 16 Feb 05. */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zheevd_(char *jobz, char *uplo, integer *n, + doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info, ftnlen jobz_len, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal eps; + integer inde; + doublereal anrm; + integer imax; + doublereal rmin, rmax; + integer lopt; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + doublereal sigma; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo, lwmin, liopt; + logical lower; + integer llrwk, lropt; + logical wantz; + integer indwk2, llwrk2; + extern doublereal dlamch_(char *, ftnlen); + integer iscale; + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum; + extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *, + integer *, doublereal *, ftnlen, ftnlen); + integer indtau; + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlascl_(char *, integer *, integer *, doublereal *, + doublereal *, integer *, integer *, doublecomplex *, integer *, + integer *, ftnlen), zstedc_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, integer *, integer *, integer *, integer + *, ftnlen); + integer indrwk, indwrk, liwmin; + extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, + doublecomplex *, integer *, integer *, ftnlen), zlacpy_(char *, + integer *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen); + integer lrwmin, llwork; + doublereal smlnum; + logical lquery; + extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *, + ftnlen, ftnlen, ftnlen); + + +/* -- LAPACK driver routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --w; + --work; + --rwork; + --iwork; + + /* Function Body */ + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + lower = lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + *info = 0; + if (! (wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -1; + } else if (! (lower || lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } + + if (*info == 0) { + if (*n <= 1) { + lwmin = 1; + lrwmin = 1; + liwmin = 1; + lopt = lwmin; + lropt = lrwmin; + liopt = liwmin; + } else { + if (wantz) { + lwmin = (*n << 1) + *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } else { + lwmin = *n + 1; + lrwmin = *n; + liwmin = 1; + } +/* Computing MAX */ + i__1 = lwmin, i__2 = *n + *n * ilaenv_(&c__1, (char *)"ZHETRD", uplo, n, & + c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lopt = max(i__1,i__2); + lropt = lrwmin; + liopt = liwmin; + } + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEEVD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + i__1 = a_dim1 + 1; + w[1] = a[i__1].r; + if (wantz) { + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + } + return 0; + } + +/* Get machine constants. */ + + safmin = dlamch_((char *)"Safe minimum", (ftnlen)12); + eps = dlamch_((char *)"Precision", (ftnlen)9); + smlnum = safmin / eps; + bignum = 1. / smlnum; + rmin = sqrt(smlnum); + rmax = sqrt(bignum); + +/* Scale matrix to allowable range, if necessary. */ + + anrm = zlanhe_((char *)"M", uplo, n, &a[a_offset], lda, &rwork[1], (ftnlen)1, ( + ftnlen)1); + iscale = 0; + if (anrm > 0. && anrm < rmin) { + iscale = 1; + sigma = rmin / anrm; + } else if (anrm > rmax) { + iscale = 1; + sigma = rmax / anrm; + } + if (iscale == 1) { + zlascl_(uplo, &c__0, &c__0, &c_b18, &sigma, n, n, &a[a_offset], lda, + info, (ftnlen)1); + } + +/* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */ + + inde = 1; + indtau = 1; + indwrk = indtau + *n; + indrwk = inde + *n; + indwk2 = indwrk + *n * *n; + llwork = *lwork - indwrk + 1; + llwrk2 = *lwork - indwk2 + 1; + llrwk = *lrwork - indrwk + 1; + zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], & + work[indwrk], &llwork, &iinfo, (ftnlen)1); + +/* For eigenvalues only, call DSTERF. For eigenvectors, first call */ +/* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the */ +/* tridiagonal matrix, then call ZUNMTR to multiply it to the */ +/* Householder transformations represented as Householder vectors in */ +/* A. */ + + if (! wantz) { + dsterf_(n, &w[1], &rwork[inde], info); + } else { + zstedc_((char *)"I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2], + &llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info, ( + ftnlen)1); + zunmtr_((char *)"L", uplo, (char *)"N", n, n, &a[a_offset], lda, &work[indtau], &work[ + indwrk], n, &work[indwk2], &llwrk2, &iinfo, (ftnlen)1, ( + ftnlen)1, (ftnlen)1); + zlacpy_((char *)"A", n, n, &work[indwrk], n, &a[a_offset], lda, (ftnlen)1); + } + +/* If matrix was scaled, then rescale eigenvalues appropriately. */ + + if (iscale == 1) { + if (*info == 0) { + imax = *n; + } else { + imax = *info - 1; + } + d__1 = 1. / sigma; + dscal_(&imax, &d__1, &w[1], &c__1); + } + + work[1].r = (doublereal) lopt, work[1].i = 0.; + rwork[1] = (doublereal) lropt; + iwork[1] = liopt; + + return 0; + +/* End of ZHEEVD */ + +} /* zheevd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zhemv.cpp b/lib/linalg/zhemv.cpp new file mode 100644 index 0000000000..f965dcfcdf --- /dev/null +++ b/lib/linalg/zhemv.cpp @@ -0,0 +1,508 @@ +/* fortran/zhemv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZHEMV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ALPHA,BETA */ +/* INTEGER INCX,INCY,LDA,N */ +/* CHARACTER UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A(LDA,*),X(*),Y(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHEMV performs the matrix-vector operation */ +/* > */ +/* > y := alpha*A*x + beta*y, */ +/* > */ +/* > where alpha and beta are scalars, x and y are n element vectors and */ +/* > A is an n by n hermitian matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the array A is to be referenced as */ +/* > follows: */ +/* > */ +/* > UPLO = 'U' or 'u' Only the upper triangular part of A */ +/* > is to be referenced. */ +/* > */ +/* > UPLO = 'L' or 'l' Only the lower triangular part of A */ +/* > is to be referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension ( LDA, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular part of the hermitian matrix and the strictly */ +/* > lower triangular part of A is not referenced. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular part of the hermitian matrix and the strictly */ +/* > upper triangular part of A is not referenced. */ +/* > Note that the imaginary parts of the diagonal elements need */ +/* > not be set and are assumed to be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 */ +/* > On entry, BETA specifies the scalar beta. When BETA is */ +/* > supplied as zero then Y need not be set on input. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Y */ +/* > \verbatim */ +/* > Y is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* > Before entry, the incremented array Y must contain the n */ +/* > element vector y. On exit, Y is overwritten by the updated */ +/* > vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > The vector and matrix arguments are not referenced when N = 0, or M = 0 */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhemv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1,*n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"ZHEMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && + beta->i == 0.)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through the triangular part */ +/* of A. */ + +/* First form y := beta*y. */ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when A is stored in upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } else { + +/* Form y when A is stored in lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L90: */ + } + i__2 = j; + i__3 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L110: */ + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of ZHEMV */ + +} /* zhemv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zher2.cpp b/lib/linalg/zher2.cpp new file mode 100644 index 0000000000..2f77acd257 --- /dev/null +++ b/lib/linalg/zher2.cpp @@ -0,0 +1,519 @@ +/* fortran/zher2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZHER2 */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ALPHA */ +/* INTEGER INCX,INCY,LDA,N */ +/* CHARACTER UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A(LDA,*),X(*),Y(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHER2 performs the hermitian rank 2 operation */ +/* > */ +/* > A := alpha*x*y**H + conjg( alpha )*y*x**H + A, */ +/* > */ +/* > where alpha is a scalar, x and y are n element vectors and A is an n */ +/* > by n hermitian matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the array A is to be referenced as */ +/* > follows: */ +/* > */ +/* > UPLO = 'U' or 'u' Only the upper triangular part of A */ +/* > is to be referenced. */ +/* > */ +/* > UPLO = 'L' or 'l' Only the lower triangular part of A */ +/* > is to be referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y */ +/* > \verbatim */ +/* > Y is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* > Before entry, the incremented array Y must contain the n */ +/* > element vector y. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > On entry, INCY specifies the increment for the elements of */ +/* > Y. INCY must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension ( LDA, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular part of the hermitian matrix and the strictly */ +/* > lower triangular part of A is not referenced. On exit, the */ +/* > upper triangular part of the array A is overwritten by the */ +/* > upper triangular part of the updated matrix. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular part of the hermitian matrix and the strictly */ +/* > upper triangular part of A is not referenced. On exit, the */ +/* > lower triangular part of the array A is overwritten by the */ +/* > lower triangular part of the updated matrix. */ +/* > Note that the imaginary parts of the diagonal elements need */ +/* > not be set, they are assumed to be zero, and on exit they */ +/* > are set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zher2_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, + doublecomplex *a, integer *lda, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --y; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*incy == 0) { + info = 7; + } else if (*lda < max(1,*n)) { + info = 9; + } + if (info != 0) { + xerbla_((char *)"ZHER2 ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + +/* Set up the start points in X and Y if the increments are not both */ +/* unity. */ + + if (*incx != 1 || *incy != 1) { + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + jx = kx; + jy = ky; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through the triangular part */ +/* of A. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Form A when A is stored in the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[j]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = j; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = i__; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = jx; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = iy; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + iy += *incy; +/* L30: */ + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + jy += *incy; +/* L40: */ + } + } + } else { + +/* Form A when A is stored in the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + i__3 = j; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[j]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = j; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = j; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = i__; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L50: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + i__3 = jy; + if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || + y[i__3].i != 0.)) { + d_cnjg(&z__2, &y[jy]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = + alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__2 = jx; + z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2] + .r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, + z__2.i = x[i__4].r * temp1.i + x[i__4].i * + temp1.r; + i__5 = jy; + z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, + z__3.i = y[i__5].r * temp2.i + y[i__5].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, + z__3.i = x[i__5].r * temp1.i + x[i__5].i * + temp1.r; + z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + + z__3.i; + i__6 = iy; + z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, + z__4.i = y[i__6].r * temp2.i + y[i__6].i * + temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L70: */ + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + jy += *incy; +/* L80: */ + } + } + } + + return 0; + +/* End of ZHER2 */ + +} /* zher2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zher2k.cpp b/lib/linalg/zher2k.cpp new file mode 100644 index 0000000000..1b94010d33 --- /dev/null +++ b/lib/linalg/zher2k.cpp @@ -0,0 +1,750 @@ +/* fortran/zher2k.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZHER2K */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ALPHA */ +/* DOUBLE PRECISION BETA */ +/* INTEGER K,LDA,LDB,LDC,N */ +/* CHARACTER TRANS,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHER2K performs one of the hermitian rank 2k operations */ +/* > */ +/* > C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C, */ +/* > */ +/* > or */ +/* > */ +/* > C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C, */ +/* > */ +/* > where alpha and beta are scalars with beta real, C is an n by n */ +/* > hermitian matrix and A and B are n by k matrices in the first case */ +/* > and k by n matrices in the second case. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the array C is to be referenced as */ +/* > follows: */ +/* > */ +/* > UPLO = 'U' or 'u' Only the upper triangular part of C */ +/* > is to be referenced. */ +/* > */ +/* > UPLO = 'L' or 'l' Only the lower triangular part of C */ +/* > is to be referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' C := alpha*A*B**H + */ +/* > conjg( alpha )*B*A**H + */ +/* > beta*C. */ +/* > */ +/* > TRANS = 'C' or 'c' C := alpha*A**H*B + */ +/* > conjg( alpha )*B**H*A + */ +/* > beta*C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix C. N must be */ +/* > at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > On entry with TRANS = 'N' or 'n', K specifies the number */ +/* > of columns of the matrices A and B, and on entry with */ +/* > TRANS = 'C' or 'c', K specifies the number of rows of the */ +/* > matrices A and B. K must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 . */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is */ +/* > k when TRANS = 'N' or 'n', and is n otherwise. */ +/* > Before entry with TRANS = 'N' or 'n', the leading n by k */ +/* > part of the array A must contain the matrix A, otherwise */ +/* > the leading k by n part of the array A must contain the */ +/* > matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* > then LDA must be at least max( 1, n ), otherwise LDA must */ +/* > be at least max( 1, k ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is */ +/* > k when TRANS = 'N' or 'n', and is n otherwise. */ +/* > Before entry with TRANS = 'N' or 'n', the leading n by k */ +/* > part of the array B must contain the matrix B, otherwise */ +/* > the leading k by n part of the array B must contain the */ +/* > matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > On entry, LDB specifies the first dimension of B as declared */ +/* > in the calling (sub) program. When TRANS = 'N' or 'n' */ +/* > then LDB must be at least max( 1, n ), otherwise LDB must */ +/* > be at least max( 1, k ). */ +/* > Unchanged on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is DOUBLE PRECISION . */ +/* > On entry, BETA specifies the scalar beta. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension ( LDC, N ) */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array C must contain the upper */ +/* > triangular part of the hermitian matrix and the strictly */ +/* > lower triangular part of C is not referenced. On exit, the */ +/* > upper triangular part of the array C is overwritten by the */ +/* > upper triangular part of the updated matrix. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array C must contain the lower */ +/* > triangular part of the hermitian matrix and the strictly */ +/* > upper triangular part of C is not referenced. On exit, the */ +/* > lower triangular part of the array C is overwritten by the */ +/* > lower triangular part of the updated matrix. */ +/* > Note that the imaginary parts of the diagonal elements need */ +/* > not be set, they are assumed to be zero, and on exit they */ +/* > are set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > On entry, LDC specifies the first dimension of C as declared */ +/* > in the calling (sub) program. LDC must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > */ +/* > -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. */ +/* > Ed Anderson, Cray Research Inc. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zher2k_(char *uplo, char *trans, integer *n, integer *k, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex * + b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc, + ftnlen uplo_len, ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5, i__6, i__7; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, l, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + + info = 0; + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,nrowa)) { + info = 9; + } else if (*ldc < max(1,*n)) { + info = 12; + } + if (info != 0) { + xerbla_((char *)"ZHER2K", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && *beta == + 1.) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L30: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; +/* L40: */ + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form C := alpha*A*B**H + conjg( alpha )*B*A**H + */ +/* C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L90: */ + } + } else if (*beta != 1.) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L100: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != + 0. || b[i__4].i != 0.)) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * + z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__3 = j + l * a_dim1; + z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, z__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5] + .i + z__3.i; + i__7 = i__ + l * b_dim1; + z__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, z__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + + z__4.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L110: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + z__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + z__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; +/* L140: */ + } + } else if (*beta != 1.) { + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[ + i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L150: */ + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + i__4 = j + l * b_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != + 0. || b[i__4].i != 0.)) { + d_cnjg(&z__2, &b[j + l * b_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * + z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + i__3 = j + l * a_dim1; + z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__2.i = alpha->r * a[i__3].i + alpha->i * a[ + i__3].r; + d_cnjg(&z__1, &z__2); + temp2.r = z__1.r, temp2.i = z__1.i; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__3.r = a[i__6].r * temp1.r - a[i__6].i * + temp1.i, z__3.i = a[i__6].r * temp1.i + a[ + i__6].i * temp1.r; + z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5] + .i + z__3.i; + i__7 = i__ + l * b_dim1; + z__4.r = b[i__7].r * temp2.r - b[i__7].i * + temp2.i, z__4.i = b[i__7].r * temp2.i + b[ + i__7].i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + + z__4.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; +/* L160: */ + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, + z__2.i = a[i__5].r * temp1.i + a[i__5].i * + temp1.r; + i__6 = j + l * b_dim1; + z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, + z__3.i = b[i__6].r * temp2.i + b[i__6].i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A**H*B + conjg( alpha )*B**H*A + */ +/* C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1.r = 0., temp1.i = 0.; + temp2.r = 0., temp2.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i; + temp1.r = z__1.r, temp1.i = z__1.i; + d_cnjg(&z__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L190: */ + } + if (i__ == j) { + if (*beta == 0.) { + i__3 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = *beta * c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } else { + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * + c__[i__4].i; + z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + + z__4.i; + d_cnjg(&z__6, alpha); + z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, + z__5.i = z__6.r * temp2.i + z__6.i * + temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp1.r = 0., temp1.i = 0.; + temp2.r = 0., temp2.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4] + .r; + z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i; + temp1.r = z__1.r, temp1.i = z__1.i; + d_cnjg(&z__3, &b[l + i__ * b_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4] + .r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L220: */ + } + if (i__ == j) { + if (*beta == 0.) { + i__3 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } else { + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + d__1 = *beta * c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } else { + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__2.i = alpha->r * temp1.i + alpha->i * + temp1.r; + d_cnjg(&z__4, alpha); + z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, + z__3.i = z__4.r * temp2.i + z__4.i * + temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * + c__[i__4].i; + z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, + z__4.i = alpha->r * temp1.i + alpha->i * + temp1.r; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + + z__4.i; + d_cnjg(&z__6, alpha); + z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, + z__5.i = z__6.r * temp2.i + z__6.i * + temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of ZHER2K */ + +} /* zher2k_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zhetd2.cpp b/lib/linalg/zhetd2.cpp new file mode 100644 index 0000000000..c6c59ad6d5 --- /dev/null +++ b/lib/linalg/zhetd2.cpp @@ -0,0 +1,439 @@ +/* fortran/zhetd2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublecomplex c_b2 = {0.,0.}; +static integer c__1 = 1; + +/* > \brief \b ZHETD2 reduces a Hermitian matrix to real symmetric tridiagonal form by an unitary similarity t +ransformation (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETD2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* COMPLEX*16 A( LDA, * ), TAU( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETD2 reduces a complex Hermitian matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n-1) . . . H(2) H(1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* > A(1:i-1,i+1), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(n-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( d e v2 v3 v4 ) ( d ) */ +/* > ( d e v3 v4 ) ( e d ) */ +/* > ( d e v4 ) ( v1 e d ) */ +/* > ( d e ) ( v1 v2 e d ) */ +/* > ( d ) ( v1 v2 v3 e d ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhetd2_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + integer i__; + doublecomplex taui; + extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen); + doublecomplex alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, doublecomplex *, integer *, ftnlen); + logical upper; + extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *), xerbla_( + char *, integer *, ftnlen), zlarfg_(integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETD2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + if (upper) { + +/* Reduce the upper triangle of A */ + + i__1 = *n + *n * a_dim1; + i__2 = *n + *n * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + for (i__ = *n - 1; i__ >= 1; --i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v**H */ +/* to annihilate A(1:i-1,i+1) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + alpha.r = a[i__1].r, alpha.i = a[i__1].i; + zlarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); + e[i__] = alpha.r; + + if (taui.r != 0. || taui.i != 0.) { + +/* Apply H(i) from both sides to A(1:i,1:i) */ + + i__1 = i__ + (i__ + 1) * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + +/* Compute x := tau * A * v storing x in TAU(1:i) */ + + zhemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * + a_dim1 + 1], &c__1, &c_b2, &tau[1], &c__1, (ftnlen)1); + +/* Compute w := x - 1/2 * tau * (x**H * v) * v */ + + z__3.r = -.5, z__3.i = -0.; + z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * + taui.i + z__3.i * taui.r; + zdotc_(&z__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1] + , &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + zaxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ + 1], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w**H - w * v**H */ + + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__, &z__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, & + tau[1], &c__1, &a[a_offset], lda, (ftnlen)1); + + } else { + i__1 = i__ + i__ * a_dim1; + i__2 = i__ + i__ * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + i__1 = i__ + (i__ + 1) * a_dim1; + i__2 = i__; + a[i__1].r = e[i__2], a[i__1].i = 0.; + i__1 = i__ + 1 + (i__ + 1) * a_dim1; + d__[i__ + 1] = a[i__1].r; + i__1 = i__; + tau[i__1].r = taui.r, tau[i__1].i = taui.i; +/* L10: */ + } + i__1 = a_dim1 + 1; + d__[1] = a[i__1].r; + } else { + +/* Reduce the lower triangle of A */ + + i__1 = a_dim1 + 1; + i__2 = a_dim1 + 1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Generate elementary reflector H(i) = I - tau * v * v**H */ +/* to annihilate A(i+2:n,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, & + taui); + e[i__] = alpha.r; + + if (taui.r != 0. || taui.i != 0.) { + +/* Apply H(i) from both sides to A(i+1:n,i+1:n) */ + + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute x := tau * A * v storing y in TAU(i:n-1) */ + + i__2 = *n - i__; + zhemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2, &tau[ + i__], &c__1, (ftnlen)1); + +/* Compute w := x - 1/2 * tau * (x**H * v) * v */ + + z__3.r = -.5, z__3.i = -0.; + z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r * + taui.i + z__3.i * taui.r; + i__2 = *n - i__; + zdotc_(&z__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * + a_dim1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *n - i__; + zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ + i__], &c__1); + +/* Apply the transformation as a rank-2 update: */ +/* A := A - v * w**H - w * v**H */ + + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1, + &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], + lda, (ftnlen)1); + + } else { + i__2 = i__ + 1 + (i__ + 1) * a_dim1; + i__3 = i__ + 1 + (i__ + 1) * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + i__2 = i__ + 1 + i__ * a_dim1; + i__3 = i__; + a[i__2].r = e[i__3], a[i__2].i = 0.; + i__2 = i__ + i__ * a_dim1; + d__[i__] = a[i__2].r; + i__2 = i__; + tau[i__2].r = taui.r, tau[i__2].i = taui.i; +/* L20: */ + } + i__1 = *n + *n * a_dim1; + d__[*n] = a[i__1].r; + } + + return 0; + +/* End of ZHETD2 */ + +} /* zhetd2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zhetrd.cpp b/lib/linalg/zhetrd.cpp new file mode 100644 index 0000000000..e1c8481bbb --- /dev/null +++ b/lib/linalg/zhetrd.cpp @@ -0,0 +1,463 @@ +/* fortran/zhetrd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static doublereal c_b23 = 1.; + +/* > \brief \b ZHETRD */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZHETRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), E( * ) */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHETRD reduces a complex Hermitian matrix A to real symmetric */ +/* > tridiagonal form T by a unitary similarity transformation: */ +/* > Q**H * A * Q = T. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > N-by-N upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading N-by-N lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit, if UPLO = 'U', the diagonal and first superdiagonal */ +/* > of A are overwritten by the corresponding elements of the */ +/* > tridiagonal matrix T, and the elements above the first */ +/* > superdiagonal, with the array TAU, represent the unitary */ +/* > matrix Q as a product of elementary reflectors; if UPLO */ +/* > = 'L', the diagonal and first subdiagonal of A are over- */ +/* > written by the corresponding elements of the tridiagonal */ +/* > matrix T, and the elements below the first subdiagonal, with */ +/* > the array TAU, represent the unitary matrix Q as a product */ +/* > of elementary reflectors. See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > The diagonal elements of the tridiagonal matrix T: */ +/* > D(i) = A(i,i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > The off-diagonal elements of the tridiagonal matrix T: */ +/* > E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors (see Further */ +/* > Details). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= 1. */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16HEcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n-1) . . . H(2) H(1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ +/* > A(1:i-1,i+1), and tau in TAU(i). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(n-1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( d e v2 v3 v4 ) ( d ) */ +/* > ( d e v3 v4 ) ( e d ) */ +/* > ( d e v4 ) ( v1 e d ) */ +/* > ( d e ) ( v1 v2 e d ) */ +/* > ( d ) ( v1 v2 v3 e d ) */ +/* > */ +/* > where d and e denote diagonal and off-diagonal elements of T, and vi */ +/* > denotes an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhetrd_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau, + doublecomplex *work, integer *lwork, integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + integer i__, j, nb, kk, nx, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern /* Subroutine */ int zhetd2_(char *, integer *, doublecomplex *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *, + ftnlen), zher2k_(char *, char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlatrd_(char *, integer *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, + doublecomplex *, integer *, ftnlen); + integer ldwork, lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --d__; + --e; + --tau; + --work; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else if (*lwork < 1 && ! lquery) { + *info = -9; + } + + if (*info == 0) { + +/* Determine the block size. */ + + nb = ilaenv_(&c__1, (char *)"ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, + (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nx = *n; + iws = 1; + if (nb > 1 && nb < *n) { + +/* Determine when to cross over from blocked to unblocked code */ +/* (last block is always handled by unblocked code). */ + +/* Computing MAX */ + i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"ZHETRD", uplo, n, &c_n1, &c_n1, & + c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *n) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: determine the */ +/* minimum value of NB, and reduce NB or force use of */ +/* unblocked code by setting NX = N. */ + +/* Computing MAX */ + i__1 = *lwork / ldwork; + nb = max(i__1,1); + nbmin = ilaenv_(&c__2, (char *)"ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, + (ftnlen)6, (ftnlen)1); + if (nb < nbmin) { + nx = *n; + } + } + } else { + nx = *n; + } + } else { + nb = 1; + } + + if (upper) { + +/* Reduce the upper triangle of A. */ +/* Columns 1:kk are handled by the unblocked method. */ + + kk = *n - (*n - nx + nb - 1) / nb * nb; + i__1 = kk + 1; + i__2 = -nb; + for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = i__ + nb - 1; + zlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & + work[1], &ldwork, (ftnlen)1); + +/* Update the unreduced submatrix A(1:i-1,1:i-1), using an */ +/* update of the form: A := A - V*W**H - W*V**H */ + + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, (char *)"No transpose", &i__3, &nb, &z__1, &a[i__ * a_dim1 + + 1], lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda, ( + ftnlen)1, (ftnlen)12); + +/* Copy superdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j - 1 + j * a_dim1; + i__5 = j - 1; + a[i__4].r = e[i__5], a[i__4].i = 0.; + i__4 = j + j * a_dim1; + d__[j] = a[i__4].r; +/* L10: */ + } +/* L20: */ + } + +/* Use unblocked code to reduce the last or only block */ + + zhetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo, + (ftnlen)1); + } else { + +/* Reduce the lower triangle of A */ + + i__2 = *n - nx; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + +/* Reduce columns i:i+nb-1 to tridiagonal form and form the */ +/* matrix W which is needed to update the unreduced part of */ +/* the matrix */ + + i__3 = *n - i__ + 1; + zlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], & + tau[i__], &work[1], &ldwork, (ftnlen)1); + +/* Update the unreduced submatrix A(i+nb:n,i+nb:n), using */ +/* an update of the form: A := A - V*W**H - W*V**H */ + + i__3 = *n - i__ - nb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, (char *)"No transpose", &i__3, &nb, &z__1, &a[i__ + nb + + i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b23, &a[ + i__ + nb + (i__ + nb) * a_dim1], lda, (ftnlen)1, (ftnlen) + 12); + +/* Copy subdiagonal elements back into A, and diagonal */ +/* elements into D */ + + i__3 = i__ + nb - 1; + for (j = i__; j <= i__3; ++j) { + i__4 = j + 1 + j * a_dim1; + i__5 = j; + a[i__4].r = e[i__5], a[i__4].i = 0.; + i__4 = j + j * a_dim1; + d__[j] = a[i__4].r; +/* L30: */ + } +/* L40: */ + } + +/* Use unblocked code to reduce the last or only block */ + + i__1 = *n - i__ + 1; + zhetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], + &tau[i__], &iinfo, (ftnlen)1); + } + + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZHETRD */ + +} /* zhetrd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zhpr.cpp b/lib/linalg/zhpr.cpp new file mode 100644 index 0000000000..f3c36d5826 --- /dev/null +++ b/lib/linalg/zhpr.cpp @@ -0,0 +1,405 @@ +/* fortran/zhpr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZHPR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZHPR(UPLO,N,ALPHA,X,INCX,AP) */ + +/* .. Scalar Arguments .. */ +/* DOUBLE PRECISION ALPHA */ +/* INTEGER INCX,N */ +/* CHARACTER UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 AP(*),X(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZHPR performs the hermitian rank 1 operation */ +/* > */ +/* > A := alpha*x*x**H + A, */ +/* > */ +/* > where alpha is a real scalar, x is an n element vector and A is an */ +/* > n by n hermitian matrix, supplied in packed form. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the upper or lower */ +/* > triangular part of the matrix A is supplied in the packed */ +/* > array AP as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' The upper triangular part of A is */ +/* > supplied in AP. */ +/* > */ +/* > UPLO = 'L' or 'l' The lower triangular part of A is */ +/* > supplied in AP. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is DOUBLE PRECISION. */ +/* > On entry, ALPHA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AP */ +/* > \verbatim */ +/* > AP is COMPLEX*16 array, dimension at least */ +/* > ( ( n*( n + 1 ) )/2 ). */ +/* > Before entry with UPLO = 'U' or 'u', the array AP must */ +/* > contain the upper triangular part of the hermitian matrix */ +/* > packed sequentially, column by column, so that AP( 1 ) */ +/* > contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ +/* > and a( 2, 2 ) respectively, and so on. On exit, the array */ +/* > AP is overwritten by the upper triangular part of the */ +/* > updated matrix. */ +/* > Before entry with UPLO = 'L' or 'l', the array AP must */ +/* > contain the lower triangular part of the hermitian matrix */ +/* > packed sequentially, column by column, so that AP( 1 ) */ +/* > contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ +/* > and a( 3, 1 ) respectively, and so on. On exit, the array */ +/* > AP is overwritten by the lower triangular part of the */ +/* > updated matrix. */ +/* > Note that the imaginary parts of the diagonal elements need */ +/* > not be set, they are assumed to be zero, and on exit they */ +/* > are set to zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zhpr_(char *uplo, integer *n, doublereal *alpha, + doublecomplex *x, integer *incx, doublecomplex *ap, ftnlen uplo_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, k, kk, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } + if (info != 0) { + xerbla_((char *)"ZHPR ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || *alpha == 0.) { + return 0; + } + +/* Set the start point in X if the increment is not unity. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of the array AP */ +/* are accessed sequentially with one pass through AP. */ + + kk = 1; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Form A when upper triangle is stored in AP. */ + + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_cnjg(&z__2, &x[j]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = k; + i__4 = k; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * + temp.r; + z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + + z__2.i; + ap[i__3].r = z__1.r, ap[i__3].i = z__1.i; + ++k; +/* L10: */ + } + i__2 = kk + j - 1; + i__3 = kk + j - 1; + i__4 = j; + z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i = + x[i__4].r * temp.i + x[i__4].i * temp.r; + d__1 = ap[i__3].r + z__1.r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + } else { + i__2 = kk + j - 1; + i__3 = kk + j - 1; + d__1 = ap[i__3].r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + } + kk += j; +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_cnjg(&z__2, &x[jx]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + i__3 = k; + i__4 = k; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * + temp.r; + z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + + z__2.i; + ap[i__3].r = z__1.r, ap[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + i__2 = kk + j - 1; + i__3 = kk + j - 1; + i__4 = jx; + z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i = + x[i__4].r * temp.i + x[i__4].i * temp.r; + d__1 = ap[i__3].r + z__1.r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + } else { + i__2 = kk + j - 1; + i__3 = kk + j - 1; + d__1 = ap[i__3].r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + } + jx += *incx; + kk += j; +/* L40: */ + } + } + } else { + +/* Form A when lower triangle is stored in AP. */ + + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_cnjg(&z__2, &x[j]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = kk; + i__3 = kk; + i__4 = j; + z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i = + temp.r * x[i__4].i + temp.i * x[i__4].r; + d__1 = ap[i__3].r + z__1.r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = k; + i__4 = k; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * + temp.r; + z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + + z__2.i; + ap[i__3].r = z__1.r, ap[i__3].i = z__1.i; + ++k; +/* L50: */ + } + } else { + i__2 = kk; + i__3 = kk; + d__1 = ap[i__3].r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + } + kk = kk + *n - j + 1; +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_cnjg(&z__2, &x[jx]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = kk; + i__3 = kk; + i__4 = jx; + z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i = + temp.r * x[i__4].i + temp.i * x[i__4].r; + d__1 = ap[i__3].r + z__1.r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + ix = jx; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + i__3 = k; + i__4 = k; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * + temp.r; + z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + + z__2.i; + ap[i__3].r = z__1.r, ap[i__3].i = z__1.i; +/* L70: */ + } + } else { + i__2 = kk; + i__3 = kk; + d__1 = ap[i__3].r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + } + jx += *incx; + kk = kk + *n - j + 1; +/* L80: */ + } + } + } + + return 0; + +/* End of ZHPR */ + +} /* zhpr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlacgv.cpp b/lib/linalg/zlacgv.cpp new file mode 100644 index 0000000000..46c1c5c0ce --- /dev/null +++ b/lib/linalg/zlacgv.cpp @@ -0,0 +1,157 @@ +/* fortran/zlacgv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZLACGV conjugates a complex vector. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLACGV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLACGV( N, X, INCX ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 X( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLACGV conjugates a complex vector of length N. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The length of the vector X. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension */ +/* > (1+(N-1)*abs(INCX)) */ +/* > On entry, the vector of length N to be conjugated. */ +/* > On exit, X is overwritten with conjg(X). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The spacing between successive elements of X. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlacgv_(integer *n, doublecomplex *x, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, ioff; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*incx == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + d_cnjg(&z__1, &x[i__]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; +/* L10: */ + } + } else { + ioff = 1; + if (*incx < 0) { + ioff = 1 - (*n - 1) * *incx; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ioff; + d_cnjg(&z__1, &x[ioff]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + ioff += *incx; +/* L20: */ + } + } + return 0; + +/* End of ZLACGV */ + +} /* zlacgv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlacpy.cpp b/lib/linalg/zlacpy.cpp new file mode 100644 index 0000000000..1cb52d4c0a --- /dev/null +++ b/lib/linalg/zlacpy.cpp @@ -0,0 +1,210 @@ +/* fortran/zlacpy.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZLACPY copies all or part of one two-dimensional array to another. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLACPY + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLACPY( UPLO, M, N, A, LDA, B, LDB ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER LDA, LDB, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), B( LDB, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLACPY copies all or part of a two-dimensional matrix A to another */ +/* > matrix B. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies the part of the matrix A to be copied to B. */ +/* > = 'U': Upper triangular part */ +/* > = 'L': Lower triangular part */ +/* > Otherwise: All of the matrix A */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The m by n matrix A. If UPLO = 'U', only the upper trapezium */ +/* > is accessed; if UPLO = 'L', only the lower trapezium is */ +/* > accessed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension (LDB,N) */ +/* > On exit, B = A in the locations specified by UPLO. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlacpy_(char *uplo, integer *m, integer *n, + doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L10: */ + } +/* L20: */ + } + + } else if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L30: */ + } +/* L40: */ + } + + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * a_dim1; + b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i; +/* L50: */ + } +/* L60: */ + } + } + + return 0; + +/* End of ZLACPY */ + +} /* zlacpy_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlacrm.cpp b/lib/linalg/zlacrm.cpp new file mode 100644 index 0000000000..e993f79aa5 --- /dev/null +++ b/lib/linalg/zlacrm.cpp @@ -0,0 +1,258 @@ +/* fortran/zlacrm.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b6 = 1.; +static doublereal c_b7 = 0.; + +/* > \brief \b ZLACRM multiplies a complex matrix by a square real matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLACRM + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER LDA, LDB, LDC, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION B( LDB, * ), RWORK( * ) */ +/* COMPLEX*16 A( LDA, * ), C( LDC, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLACRM performs a very simple matrix-matrix multiplication: */ +/* > C := A * B, */ +/* > where A is M by N and complex; B is N by N and real; */ +/* > C is M by N and complex. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A and of the matrix C. */ +/* > M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns and rows of the matrix B and */ +/* > the number of columns of the matrix C. */ +/* > N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA, N) */ +/* > On entry, A contains the M by N matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >=max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] B */ +/* > \verbatim */ +/* > B is DOUBLE PRECISION array, dimension (LDB, N) */ +/* > On entry, B contains the N by N matrix B. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > The leading dimension of the array B. LDB >=max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC, N) */ +/* > On exit, C contains the M by N matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >=max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (2*M*N) */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlacrm_(integer *m, integer *n, doublecomplex *a, + integer *lda, doublereal *b, integer *ldb, doublecomplex *c__, + integer *ldc, doublereal *rwork) +{ + /* System generated locals */ + integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, + i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + integer i__, j, l; + extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --rwork; + + /* Function Body */ + if (*m == 0 || *n == 0) { + return 0; + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + rwork[(j - 1) * *m + i__] = a[i__3].r; +/* L10: */ + } +/* L20: */ + } + + l = *m * *n + 1; + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b6, &rwork[1], m, &b[b_offset], ldb, &c_b7, & + rwork[l], m, (ftnlen)1, (ftnlen)1); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = l + (j - 1) * *m + i__ - 1; + c__[i__3].r = rwork[i__4], c__[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + rwork[(j - 1) * *m + i__] = d_imag(&a[i__ + j * a_dim1]); +/* L50: */ + } +/* L60: */ + } + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b6, &rwork[1], m, &b[b_offset], ldb, &c_b7, & + rwork[l], m, (ftnlen)1, (ftnlen)1); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + d__1 = c__[i__4].r; + i__5 = l + (j - 1) * *m + i__ - 1; + z__1.r = d__1, z__1.i = rwork[i__5]; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L70: */ + } +/* L80: */ + } + + return 0; + +/* End of ZLACRM */ + +} /* zlacrm_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zladiv.cpp b/lib/linalg/zladiv.cpp new file mode 100644 index 0000000000..6e80b3371a --- /dev/null +++ b/lib/linalg/zladiv.cpp @@ -0,0 +1,132 @@ +/* fortran/zladiv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLADIV + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* COMPLEX*16 FUNCTION ZLADIV( X, Y ) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 X, Y */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLADIV := X / Y, where X and Y are complex. The computation of X / Y */ +/* > will not overflow on an intermediary step unless the results */ +/* > overflows. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Y */ +/* > \verbatim */ +/* > Y is COMPLEX*16 */ +/* > The complex scalars X and Y. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Double Complex */ VOID zladiv_(doublecomplex * ret_val, doublecomplex *x, + doublecomplex *y) +{ + /* System generated locals */ + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + doublereal zi, zr; + extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + d__1 = x->r; + d__2 = d_imag(x); + d__3 = y->r; + d__4 = d_imag(y); + dladiv_(&d__1, &d__2, &d__3, &d__4, &zr, &zi); + z__1.r = zr, z__1.i = zi; + ret_val->r = z__1.r, ret_val->i = z__1.i; + + return ; + +/* End of ZLADIV */ + +} /* zladiv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlaed0.cpp b/lib/linalg/zlaed0.cpp new file mode 100644 index 0000000000..680dd85054 --- /dev/null +++ b/lib/linalg/zlaed0.cpp @@ -0,0 +1,458 @@ +/* fortran/zlaed0.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static integer c__2 = 2; +static integer c__1 = 1; + +/* > \brief \b ZLAED0 used by ZSTEDC. Computes all eigenvalues and corresponding eigenvectors of an unreduced +symmetric tridiagonal matrix using the divide and conquer method. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLAED0 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK, */ +/* IWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, LDQ, LDQS, N, QSIZ */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION D( * ), E( * ), RWORK( * ) */ +/* COMPLEX*16 Q( LDQ, * ), QSTORE( LDQS, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Using the divide and conquer method, ZLAED0 computes all eigenvalues */ +/* > of a symmetric tridiagonal matrix which is one diagonal block of */ +/* > those from reducing a dense or band Hermitian matrix and */ +/* > corresponding eigenvectors of the dense or band matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] QSIZ */ +/* > \verbatim */ +/* > QSIZ is INTEGER */ +/* > The dimension of the unitary matrix used to reduce */ +/* > the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the diagonal elements of the tridiagonal matrix. */ +/* > On exit, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the off-diagonal elements of the tridiagonal matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ,N) */ +/* > On entry, Q must contain an QSIZ x N matrix whose columns */ +/* > unitarily orthonormal. It is a part of the unitary matrix */ +/* > that reduces the full dense Hermitian matrix to a */ +/* > (reducible) symmetric tridiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, */ +/* > the dimension of IWORK must be at least */ +/* > 6 + 6*N + 5*N*lg N */ +/* > ( lg( N ) = smallest integer k */ +/* > such that 2^k >= N ) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, */ +/* > dimension (1 + 3*N + 2*N*lg N + 3*N**2) */ +/* > ( lg( N ) = smallest integer k */ +/* > such that 2^k >= N ) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] QSTORE */ +/* > \verbatim */ +/* > QSTORE is COMPLEX*16 array, dimension (LDQS, N) */ +/* > Used to store parts of */ +/* > the eigenvector matrix when the updating matrix multiplies */ +/* > take place. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQS */ +/* > \verbatim */ +/* > LDQS is INTEGER */ +/* > The leading dimension of the array QSTORE. */ +/* > LDQS >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: The algorithm failed to compute an eigenvalue while */ +/* > working on the submatrix lying in rows and columns */ +/* > INFO/(N+1) through mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zlaed0_(integer *qsiz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore, + integer *ldqs, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2; + doublereal temp; + integer curr, iperm; + extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, + doublereal *, integer *); + integer indxq, iwrem, iqptr, tlvls; + extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaed7_(integer *, integer *, + integer *, integer *, integer *, integer *, doublereal *, + doublecomplex *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, doublecomplex *, doublereal *, integer *, integer *) + ; + integer igivcl; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlacrm_(integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, doublecomplex *, integer *, + doublereal *); + integer igivnm, submat, curprb, subpbs, igivpt; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + integer curlvl, matsiz, iprmpt, smlsiz; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* Warning: N could be as big as QSIZ! */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + qstore_dim1 = *ldqs; + qstore_offset = 1 + qstore_dim1; + qstore -= qstore_offset; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN */ +/* INFO = -1 */ +/* ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) */ +/* $ THEN */ + if (*qsiz < max(0,*n)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldq < max(1,*n)) { + *info = -6; + } else if (*ldqs < max(1,*n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAED0", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + smlsiz = ilaenv_(&c__9, (char *)"ZLAED0", (char *)" ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + +/* Determine the size and placement of the submatrices, and save in */ +/* the leading elements of IWORK. */ + + iwork[1] = *n; + subpbs = 1; + tlvls = 0; +L10: + if (iwork[subpbs] > smlsiz) { + for (j = subpbs; j >= 1; --j) { + iwork[j * 2] = (iwork[j] + 1) / 2; + iwork[(j << 1) - 1] = iwork[j] / 2; +/* L20: */ + } + ++tlvls; + subpbs <<= 1; + goto L10; + } + i__1 = subpbs; + for (j = 2; j <= i__1; ++j) { + iwork[j] += iwork[j - 1]; +/* L30: */ + } + +/* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 */ +/* using rank-1 modifications (cuts). */ + + spm1 = subpbs - 1; + i__1 = spm1; + for (i__ = 1; i__ <= i__1; ++i__) { + submat = iwork[i__] + 1; + smm1 = submat - 1; + d__[smm1] -= (d__1 = e[smm1], abs(d__1)); + d__[submat] -= (d__1 = e[smm1], abs(d__1)); +/* L40: */ + } + + indxq = (*n << 2) + 3; + +/* Set up workspaces for eigenvalues only/accumulate new vectors */ +/* routine */ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; + iperm = iprmpt + *n * lgn; + iqptr = iperm + *n * lgn; + igivpt = iqptr + *n + 2; + igivcl = igivpt + *n * lgn; + + igivnm = 1; + iq = igivnm + (*n << 1) * lgn; +/* Computing 2nd power */ + i__1 = *n; + iwrem = iq + i__1 * i__1 + 1; +/* Initialize pointers */ + i__1 = subpbs; + for (i__ = 0; i__ <= i__1; ++i__) { + iwork[iprmpt + i__] = 1; + iwork[igivpt + i__] = 1; +/* L50: */ + } + iwork[iqptr] = 1; + +/* Solve each submatrix eigenproblem at the bottom of the divide and */ +/* conquer tree. */ + + curr = 0; + i__1 = spm1; + for (i__ = 0; i__ <= i__1; ++i__) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[1]; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 1] - iwork[i__]; + } + ll = iq - 1 + iwork[iqptr + curr]; + dsteqr_((char *)"I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, & + rwork[1], info, (ftnlen)1); + zlacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], & + matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem] + ); +/* Computing 2nd power */ + i__2 = matsiz; + iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2; + ++curr; + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + k = 1; + i__2 = iwork[i__ + 1]; + for (j = submat; j <= i__2; ++j) { + iwork[indxq + j] = k; + ++k; +/* L60: */ + } +/* L70: */ + } + +/* Successively merge eigensystems of adjacent submatrices */ +/* into eigensystem for the corresponding larger matrix. */ + +/* while ( SUBPBS > 1 ) */ + + curlvl = 1; +L80: + if (subpbs > 1) { + spm2 = subpbs - 2; + i__1 = spm2; + for (i__ = 0; i__ <= i__1; i__ += 2) { + if (i__ == 0) { + submat = 1; + matsiz = iwork[2]; + msd2 = iwork[1]; + curprb = 0; + } else { + submat = iwork[i__] + 1; + matsiz = iwork[i__ + 2] - iwork[i__]; + msd2 = matsiz / 2; + ++curprb; + } + +/* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) */ +/* into an eigensystem of size MATSIZ. ZLAED7 handles the case */ +/* when the eigenvectors of a full or band Hermitian matrix (which */ +/* was reduced to tridiagonal form) are desired. */ + +/* I am free to use Q as a valuable working space until Loop 150. */ + + zlaed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[ + submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[ + submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], & + iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[ + igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat * + q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info); + if (*info > 0) { + *info = submat * (*n + 1) + submat + matsiz - 1; + return 0; + } + iwork[i__ / 2 + 1] = iwork[i__ + 2]; +/* L90: */ + } + subpbs /= 2; + ++curlvl; + goto L80; + } + +/* end while */ + +/* Re-merge the eigenvalues/vectors which were deflated at the final */ +/* merge step. */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + j = iwork[indxq + i__]; + rwork[i__] = d__[j]; + zcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1] + , &c__1); +/* L100: */ + } + dcopy_(n, &rwork[1], &c__1, &d__[1], &c__1); + + return 0; + +/* End of ZLAED0 */ + +} /* zlaed0_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlaed7.cpp b/lib/linalg/zlaed7.cpp new file mode 100644 index 0000000000..9c06a0c334 --- /dev/null +++ b/lib/linalg/zlaed7.cpp @@ -0,0 +1,456 @@ +/* fortran/zlaed7.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__2 = 2; +static integer c__1 = 1; +static integer c_n1 = -1; + +/* > \brief \b ZLAED7 used by ZSTEDC. Computes the updated eigensystem of a diagonal matrix after modification + by a rank-one symmetric matrix. Used when the original matrix is dense. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLAED7 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, */ +/* LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM, */ +/* GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK, */ +/* INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ, */ +/* $ TLVLS */ +/* DOUBLE PRECISION RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), */ +/* $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) */ +/* DOUBLE PRECISION D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * ) */ +/* COMPLEX*16 Q( LDQ, * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLAED7 computes the updated eigensystem of a diagonal */ +/* > matrix after modification by a rank-one symmetric matrix. This */ +/* > routine is used only for the eigenproblem which requires all */ +/* > eigenvalues and optionally eigenvectors of a dense or banded */ +/* > Hermitian matrix that has been reduced to tridiagonal form. */ +/* > */ +/* > T = Q(in) ( D(in) + RHO * Z*Z**H ) Q**H(in) = Q(out) * D(out) * Q**H(out) */ +/* > */ +/* > where Z = Q**Hu, u is a vector of length N with ones in the */ +/* > CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */ +/* > */ +/* > The eigenvectors of the original matrix are stored in Q, and the */ +/* > eigenvalues are in D. The algorithm consists of three stages: */ +/* > */ +/* > The first stage consists of deflating the size of the problem */ +/* > when there are multiple eigenvalues or if there is a zero in */ +/* > the Z vector. For each such occurrence the dimension of the */ +/* > secular equation problem is reduced by one. This stage is */ +/* > performed by the routine DLAED2. */ +/* > */ +/* > The second stage consists of calculating the updated */ +/* > eigenvalues. This is done by finding the roots of the secular */ +/* > equation via the routine DLAED4 (as called by SLAED3). */ +/* > This routine also calculates the eigenvectors of the current */ +/* > problem. */ +/* > */ +/* > The final stage consists of computing the updated eigenvectors */ +/* > directly using the updated eigenvalues. The eigenvectors for */ +/* > the current problem are multiplied with the eigenvectors from */ +/* > the overall problem. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CUTPNT */ +/* > \verbatim */ +/* > CUTPNT is INTEGER */ +/* > Contains the location of the last eigenvalue in the leading */ +/* > sub-matrix. min(1,N) <= CUTPNT <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] QSIZ */ +/* > \verbatim */ +/* > QSIZ is INTEGER */ +/* > The dimension of the unitary matrix used to reduce */ +/* > the full matrix to tridiagonal form. QSIZ >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TLVLS */ +/* > \verbatim */ +/* > TLVLS is INTEGER */ +/* > The total number of merging levels in the overall divide and */ +/* > conquer tree. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CURLVL */ +/* > \verbatim */ +/* > CURLVL is INTEGER */ +/* > The current level in the overall merge routine, */ +/* > 0 <= curlvl <= tlvls. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CURPBM */ +/* > \verbatim */ +/* > CURPBM is INTEGER */ +/* > The current problem in the current level in the overall */ +/* > merge routine (counting from upper left to lower right). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the eigenvalues of the rank-1-perturbed matrix. */ +/* > On exit, the eigenvalues of the repaired matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ,N) */ +/* > On entry, the eigenvectors of the rank-1-perturbed matrix. */ +/* > On exit, the eigenvectors of the repaired tridiagonal matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > Contains the subdiagonal element used to create the rank-1 */ +/* > modification. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDXQ */ +/* > \verbatim */ +/* > INDXQ is INTEGER array, dimension (N) */ +/* > This contains the permutation which will reintegrate the */ +/* > subproblem just solved back into sorted order, */ +/* > ie. D( INDXQ( I = 1, N ) ) will be in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (4*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, */ +/* > dimension (3*N+2*QSIZ*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (QSIZ*N) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] QSTORE */ +/* > \verbatim */ +/* > QSTORE is DOUBLE PRECISION array, dimension (N**2+1) */ +/* > Stores eigenvectors of submatrices encountered during */ +/* > divide and conquer, packed together. QPTR points to */ +/* > beginning of the submatrices. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] QPTR */ +/* > \verbatim */ +/* > QPTR is INTEGER array, dimension (N+2) */ +/* > List of indices pointing to beginning of submatrices stored */ +/* > in QSTORE. The submatrices are numbered starting at the */ +/* > bottom left of the divide and conquer tree, from left to */ +/* > right and bottom to top. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PRMPTR */ +/* > \verbatim */ +/* > PRMPTR is INTEGER array, dimension (N lg N) */ +/* > Contains a list of pointers which indicate where in PERM a */ +/* > level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */ +/* > indicates the size of the permutation and also the size of */ +/* > the full, non-deflated problem. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension (N lg N) */ +/* > Contains the permutations (from deflation and sorting) to be */ +/* > applied to each eigenblock. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER array, dimension (N lg N) */ +/* > Contains a list of pointers which indicate where in GIVCOL a */ +/* > level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */ +/* > indicates the number of Givens rotations. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension (2, N lg N) */ +/* > Each pair of numbers indicates a pair of columns to take place */ +/* > in a Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension (2, N lg N) */ +/* > Each number indicates the S value to be used in the */ +/* > corresponding Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: if INFO = 1, an eigenvalue did not converge */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zlaed7_(integer *n, integer *cutpnt, integer *qsiz, + integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__, + doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq, + doublereal *qstore, integer *qptr, integer *prmptr, integer *perm, + integer *givptr, integer *givcol, doublereal *givnum, doublecomplex * + work, doublereal *rwork, integer *iwork, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, i__1, i__2; + + /* Builtin functions */ + integer pow_ii(integer *, integer *); + + /* Local variables */ + integer i__, k, n1, n2, iq, iw, iz, ptr, indx, curr, indxc, indxp; + extern /* Subroutine */ int dlaed9_(integer *, integer *, integer *, + integer *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *), + zlaed8_(integer *, integer *, integer *, doublecomplex *, integer + *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, integer *), dlaeda_(integer *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *); + integer idlmda; + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *, + ftnlen), zlacrm_(integer *, integer *, doublecomplex *, integer *, + doublereal *, integer *, doublecomplex *, integer *, doublereal * + ); + integer coltyp; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --indxq; + --qstore; + --qptr; + --prmptr; + --perm; + --givptr; + givcol -= 3; + givnum -= 3; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + +/* IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN */ +/* INFO = -1 */ +/* ELSE IF( N.LT.0 ) THEN */ + if (*n < 0) { + *info = -1; + } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAED7", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* The following values are for bookkeeping purposes only. They are */ +/* integer pointers which indicate the portion of the workspace */ +/* used by a particular array in DLAED2 and SLAED3. */ + + iz = 1; + idlmda = iz + *n; + iw = idlmda + *n; + iq = iw + *n; + + indx = 1; + indxc = indx + *n; + coltyp = indxc + *n; + indxp = coltyp + *n; + +/* Form the z-vector which consists of the last row of Q_1 and the */ +/* first row of Q_2. */ + + ptr = pow_ii(&c__2, tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; + ptr += pow_ii(&c__2, &i__2); +/* L10: */ + } + curr = ptr + *curpbm; + dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], & + givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[ + iz + *n], info); + +/* When solving the final problem, we no longer need the stored data, */ +/* so we will overwrite the data from this level onto the previously */ +/* used storage space. */ + + if (*curlvl == *tlvls) { + qptr[curr] = 1; + prmptr[curr] = 1; + givptr[curr] = 1; + } + +/* Sort and Deflate eigenvalues. */ + + zlaed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz], + &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[ + indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[ + (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info); + prmptr[curr + 1] = prmptr[curr] + *n; + givptr[curr + 1] += givptr[curr]; + +/* Solve Secular Equation. */ + + if (k != 0) { + dlaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda] + , &rwork[iw], &qstore[qptr[curr]], &k, info); + zlacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[ + q_offset], ldq, &rwork[iq]); +/* Computing 2nd power */ + i__1 = k; + qptr[curr + 1] = qptr[curr] + i__1 * i__1; + if (*info != 0) { + return 0; + } + +/* Prepare the INDXQ sorting premutation. */ + + n1 = k; + n2 = *n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]); + } else { + qptr[curr + 1] = qptr[curr]; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + indxq[i__] = i__; +/* L20: */ + } + } + + return 0; + +/* End of ZLAED7 */ + +} /* zlaed7_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlaed8.cpp b/lib/linalg/zlaed8.cpp new file mode 100644 index 0000000000..00f4db857b --- /dev/null +++ b/lib/linalg/zlaed8.cpp @@ -0,0 +1,569 @@ +/* fortran/zlaed8.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b3 = -1.; +static integer c__1 = 1; + +/* > \brief \b ZLAED8 used by ZSTEDC. Merges eigenvalues and deflates secular equation. Used when the original + matrix is dense. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLAED8 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA, */ +/* Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR, */ +/* GIVCOL, GIVNUM, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ */ +/* DOUBLE PRECISION RHO */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), */ +/* $ INDXQ( * ), PERM( * ) */ +/* DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ), */ +/* $ Z( * ) */ +/* COMPLEX*16 Q( LDQ, * ), Q2( LDQ2, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLAED8 merges the two sets of eigenvalues together into a single */ +/* > sorted set. Then it tries to deflate the size of the problem. */ +/* > There are two ways in which deflation can occur: when two or more */ +/* > eigenvalues are close together or if there is a tiny element in the */ +/* > Z vector. For each such occurrence the order of the related secular */ +/* > equation problem is reduced by one. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[out] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > Contains the number of non-deflated eigenvalues. */ +/* > This is the order of the related secular equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] QSIZ */ +/* > \verbatim */ +/* > QSIZ is INTEGER */ +/* > The dimension of the unitary matrix used to reduce */ +/* > the dense or band matrix to tridiagonal form. */ +/* > QSIZ >= N if ICOMPQ = 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Q */ +/* > \verbatim */ +/* > Q is COMPLEX*16 array, dimension (LDQ,N) */ +/* > On entry, Q contains the eigenvectors of the partially solved */ +/* > system which has been previously updated in matrix */ +/* > multiplies with other partially solved eigensystems. */ +/* > On exit, Q contains the trailing (N-K) updated eigenvectors */ +/* > (those which were deflated) in its last N-K columns. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ */ +/* > \verbatim */ +/* > LDQ is INTEGER */ +/* > The leading dimension of the array Q. LDQ >= max( 1, N ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, D contains the eigenvalues of the two submatrices to */ +/* > be combined. On exit, D contains the trailing (N-K) updated */ +/* > eigenvalues (those which were deflated) sorted into increasing */ +/* > order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] RHO */ +/* > \verbatim */ +/* > RHO is DOUBLE PRECISION */ +/* > Contains the off diagonal element associated with the rank-1 */ +/* > cut which originally split the two submatrices which are now */ +/* > being recombined. RHO is modified during the computation to */ +/* > the value required by DLAED3. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CUTPNT */ +/* > \verbatim */ +/* > CUTPNT is INTEGER */ +/* > Contains the location of the last eigenvalue in the leading */ +/* > sub-matrix. MIN(1,N) <= CUTPNT <= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] Z */ +/* > \verbatim */ +/* > Z is DOUBLE PRECISION array, dimension (N) */ +/* > On input this vector contains the updating vector (the last */ +/* > row of the first sub-eigenvector matrix and the first row of */ +/* > the second sub-eigenvector matrix). The contents of Z are */ +/* > destroyed during the updating process. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] DLAMDA */ +/* > \verbatim */ +/* > DLAMDA is DOUBLE PRECISION array, dimension (N) */ +/* > Contains a copy of the first K eigenvalues which will be used */ +/* > by DLAED3 to form the secular equation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] Q2 */ +/* > \verbatim */ +/* > Q2 is COMPLEX*16 array, dimension (LDQ2,N) */ +/* > If ICOMPQ = 0, Q2 is not referenced. Otherwise, */ +/* > Contains a copy of the first K eigenvectors which will be used */ +/* > by DLAED7 in a matrix multiply (DGEMM) to update the new */ +/* > eigenvectors. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDQ2 */ +/* > \verbatim */ +/* > LDQ2 is INTEGER */ +/* > The leading dimension of the array Q2. LDQ2 >= max( 1, N ). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is DOUBLE PRECISION array, dimension (N) */ +/* > This will hold the first k values of the final */ +/* > deflation-altered z-vector and will be passed to DLAED3. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDXP */ +/* > \verbatim */ +/* > INDXP is INTEGER array, dimension (N) */ +/* > This will contain the permutation used to place deflated */ +/* > values of D at the end of the array. On output INDXP(1:K) */ +/* > points to the nondeflated D-values and INDXP(K+1:N) */ +/* > points to the deflated eigenvalues. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INDX */ +/* > \verbatim */ +/* > INDX is INTEGER array, dimension (N) */ +/* > This will contain the permutation used to sort the contents of */ +/* > D into ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INDXQ */ +/* > \verbatim */ +/* > INDXQ is INTEGER array, dimension (N) */ +/* > This contains the permutation which separately sorts the two */ +/* > sub-problems in D into ascending order. Note that elements in */ +/* > the second half of this permutation must first have CUTPNT */ +/* > added to their values in order to be accurate. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] PERM */ +/* > \verbatim */ +/* > PERM is INTEGER array, dimension (N) */ +/* > Contains the permutations (from deflation and sorting) to be */ +/* > applied to each eigenblock. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVPTR */ +/* > \verbatim */ +/* > GIVPTR is INTEGER */ +/* > Contains the number of Givens rotations which took place in */ +/* > this subproblem. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVCOL */ +/* > \verbatim */ +/* > GIVCOL is INTEGER array, dimension (2, N) */ +/* > Each pair of numbers indicates a pair of columns to take place */ +/* > in a Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] GIVNUM */ +/* > \verbatim */ +/* > GIVNUM is DOUBLE PRECISION array, dimension (2, N) */ +/* > Each number indicates the S value to be used in the */ +/* > corresponding Givens rotation. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zlaed8_(integer *k, integer *n, integer *qsiz, + doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho, + integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex * + q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx, + integer *indxq, integer *perm, integer *givptr, integer *givcol, + doublereal *givnum, integer *info) +{ + /* System generated locals */ + integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; + doublereal d__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + doublereal c__; + integer i__, j; + doublereal s, t; + integer k2, n1, n2, jp, n1p1; + doublereal eps, tau, tol; + integer jlam, imax, jmax; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *), dcopy_(integer *, doublereal *, integer *, doublereal + *, integer *), zdrot_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublereal *), zcopy_( + integer *, doublecomplex *, integer *, doublecomplex *, integer *) + ; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, + ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *, + integer *, integer *, integer *), xerbla_(char *, integer *, + ftnlen), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --d__; + --z__; + --dlamda; + q2_dim1 = *ldq2; + q2_offset = 1 + q2_dim1; + q2 -= q2_offset; + --w; + --indxp; + --indx; + --indxq; + --perm; + givcol -= 3; + givnum -= 3; + + /* Function Body */ + *info = 0; + + if (*n < 0) { + *info = -2; + } else if (*qsiz < *n) { + *info = -3; + } else if (*ldq < max(1,*n)) { + *info = -5; + } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { + *info = -8; + } else if (*ldq2 < max(1,*n)) { + *info = -12; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAED8", &i__1, (ftnlen)6); + return 0; + } + +/* Need to initialize GIVPTR to O here in case of quick exit */ +/* to prevent an unspecified code behavior (usually sigfault) */ +/* when IWORK array on entry to *stedc is not zeroed */ +/* (or at least some IWORK entries which used in *laed7 for GIVPTR). */ + + *givptr = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + n1 = *cutpnt; + n2 = *n - n1; + n1p1 = n1 + 1; + + if (*rho < 0.) { + dscal_(&n2, &c_b3, &z__[n1p1], &c__1); + } + +/* Normalize z so that norm(z) = 1 */ + + t = 1. / sqrt(2.); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + indx[j] = j; +/* L10: */ + } + dscal_(n, &t, &z__[1], &c__1); + *rho = (d__1 = *rho * 2., abs(d__1)); + +/* Sort the eigenvalues into increasing order */ + + i__1 = *n; + for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { + indxq[i__] += *cutpnt; +/* L20: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + dlamda[i__] = d__[indxq[i__]]; + w[i__] = z__[indxq[i__]]; +/* L30: */ + } + i__ = 1; + j = *cutpnt + 1; + dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = dlamda[indx[i__]]; + z__[i__] = w[indx[i__]]; +/* L40: */ + } + +/* Calculate the allowable deflation tolerance */ + + imax = idamax_(n, &z__[1], &c__1); + jmax = idamax_(n, &d__[1], &c__1); + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + tol = eps * 8. * (d__1 = d__[jmax], abs(d__1)); + +/* If the rank-1 modifier is small enough, no more needs to be done */ +/* -- except to reorganize Q so that its columns correspond with the */ +/* elements in D. */ + + if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) { + *k = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + perm[j] = indxq[indx[j]]; + zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] + , &c__1); +/* L50: */ + } + zlacpy_((char *)"A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq, ( + ftnlen)1); + return 0; + } + +/* If there are multiple eigenvalues then the problem deflates. Here */ +/* the number of equal eigenvalues are found. As each equal */ +/* eigenvalue is found, an elementary reflector is computed to rotate */ +/* the corresponding eigensubspace so that the corresponding */ +/* components of Z are zero in this new basis. */ + + *k = 0; + k2 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + if (j == *n) { + goto L100; + } + } else { + jlam = j; + goto L70; + } +/* L60: */ + } +L70: + ++j; + if (j > *n) { + goto L90; + } + if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) { + +/* Deflate due to small z component. */ + + --k2; + indxp[k2] = j; + } else { + +/* Check if eigenvalues are close enough to allow deflation. */ + + s = z__[jlam]; + c__ = z__[j]; + +/* Find sqrt(a**2+b**2) without overflow or */ +/* destructive underflow. */ + + tau = dlapy2_(&c__, &s); + t = d__[j] - d__[jlam]; + c__ /= tau; + s = -s / tau; + if ((d__1 = t * c__ * s, abs(d__1)) <= tol) { + +/* Deflation is possible. */ + + z__[j] = tau; + z__[jlam] = 0.; + +/* Record the appropriate Givens rotation */ + + ++(*givptr); + givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; + givcol[(*givptr << 1) + 2] = indxq[indx[j]]; + givnum[(*givptr << 1) + 1] = c__; + givnum[(*givptr << 1) + 2] = s; + zdrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[ + indx[j]] * q_dim1 + 1], &c__1, &c__, &s); + t = d__[jlam] * c__ * c__ + d__[j] * s * s; + d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; + d__[jlam] = t; + --k2; + i__ = 1; +L80: + if (k2 + i__ <= *n) { + if (d__[jlam] < d__[indxp[k2 + i__]]) { + indxp[k2 + i__ - 1] = indxp[k2 + i__]; + indxp[k2 + i__] = jlam; + ++i__; + goto L80; + } else { + indxp[k2 + i__ - 1] = jlam; + } + } else { + indxp[k2 + i__ - 1] = jlam; + } + jlam = j; + } else { + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + jlam = j; + } + } + goto L70; +L90: + +/* Record the last eigenvalue. */ + + ++(*k); + w[*k] = z__[jlam]; + dlamda[*k] = d__[jlam]; + indxp[*k] = jlam; + +L100: + +/* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ +/* and Q2 respectively. The eigenvalues/vectors which were not */ +/* deflated go into the first K slots of DLAMDA and Q2 respectively, */ +/* while those which were deflated go into the last N - K slots. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jp = indxp[j]; + dlamda[j] = d__[jp]; + perm[j] = indxq[indx[jp]]; + zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], & + c__1); +/* L110: */ + } + +/* The deflated eigenvalues and their corresponding vectors go back */ +/* into the last N - K slots of D and Q respectively. */ + + if (*k < *n) { + i__1 = *n - *k; + dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = *n - *k; + zlacpy_((char *)"A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + + 1) * q_dim1 + 1], ldq, (ftnlen)1); + } + + return 0; + +/* End of ZLAED8 */ + +} /* zlaed8_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlanhe.cpp b/lib/linalg/zlanhe.cpp new file mode 100644 index 0000000000..2522e8d500 --- /dev/null +++ b/lib/linalg/zlanhe.cpp @@ -0,0 +1,348 @@ +/* fortran/zlanhe.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b ZLANHE returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the ele +ment of largest absolute value of a complex Hermitian matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLANHE + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER NORM, UPLO */ +/* INTEGER LDA, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION WORK( * ) */ +/* COMPLEX*16 A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLANHE returns the value of the one norm, or the Frobenius norm, or */ +/* > the infinity norm, or the element of largest absolute value of a */ +/* > complex hermitian matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \return ZLANHE */ +/* > \verbatim */ +/* > */ +/* > ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' */ +/* > ( */ +/* > ( norm1(A), NORM = '1', 'O' or 'o' */ +/* > ( */ +/* > ( normI(A), NORM = 'I' or 'i' */ +/* > ( */ +/* > ( normF(A), NORM = 'F', 'f', 'E' or 'e' */ +/* > */ +/* > where norm1 denotes the one norm of a matrix (maximum column sum), */ +/* > normI denotes the infinity norm of a matrix (maximum row sum) and */ +/* > normF denotes the Frobenius norm of a matrix (square root of sum of */ +/* > squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] NORM */ +/* > \verbatim */ +/* > NORM is CHARACTER*1 */ +/* > Specifies the value to be returned in ZLANHE as described */ +/* > above. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > hermitian matrix A is to be referenced. */ +/* > = 'U': Upper triangular part of A is referenced */ +/* > = 'L': Lower triangular part of A is referenced */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. When N = 0, ZLANHE is */ +/* > set to zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The hermitian matrix A. If UPLO = 'U', the leading n by n */ +/* > upper triangular part of A contains the upper triangular part */ +/* > of the matrix A, and the strictly lower triangular part of A */ +/* > is not referenced. If UPLO = 'L', the leading n by n lower */ +/* > triangular part of A contains the lower triangular part of */ +/* > the matrix A, and the strictly upper triangular part of A is */ +/* > not referenced. Note that the imaginary parts of the diagonal */ +/* > elements need not be set and are assumed to be zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(N,1). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), */ +/* > where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, */ +/* > WORK is not referenced. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16HEauxiliary */ + +/* ===================================================================== */ +doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a, + integer *lda, doublereal *work, ftnlen norm_len, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal ret_val, d__1; + + /* Builtin functions */ + double z_abs(doublecomplex *), sqrt(doublereal); + + /* Local variables */ + integer i__, j; + doublereal sum, absa, scale; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal value; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, + doublereal *, doublereal *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --work; + + /* Function Body */ + if (*n == 0) { + value = 0.; + } else if (lsame_(norm, (char *)"M", (ftnlen)1, (ftnlen)1)) { + +/* Find max(abs(A(i,j))). */ + + value = 0.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + sum = z_abs(&a[i__ + j * a_dim1]); + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L10: */ + } + i__2 = j + j * a_dim1; + sum = (d__1 = a[i__2].r, abs(d__1)); + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + sum = (d__1 = a[i__2].r, abs(d__1)); + if (value < sum || disnan_(&sum)) { + value = sum; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + sum = z_abs(&a[i__ + j * a_dim1]); + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L30: */ + } +/* L40: */ + } + } + } else if (lsame_(norm, (char *)"I", (ftnlen)1, (ftnlen)1) || lsame_(norm, (char *)"O", ( + ftnlen)1, (ftnlen)1) || *(unsigned char *)norm == '1') { + +/* Find normI(A) ( = norm1(A), since A is hermitian). */ + + value = 0.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + absa = z_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L50: */ + } + i__2 = j + j * a_dim1; + work[j] = sum + (d__1 = a[i__2].r, abs(d__1)); +/* L60: */ + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + sum = work[i__]; + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L70: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + work[i__] = 0.; +/* L80: */ + } + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + sum = work[j] + (d__1 = a[i__2].r, abs(d__1)); + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + absa = z_abs(&a[i__ + j * a_dim1]); + sum += absa; + work[i__] += absa; +/* L90: */ + } + if (value < sum || disnan_(&sum)) { + value = sum; + } +/* L100: */ + } + } + } else if (lsame_(norm, (char *)"F", (ftnlen)1, (ftnlen)1) || lsame_(norm, (char *)"E", ( + ftnlen)1, (ftnlen)1)) { + +/* Find normF(A). */ + + scale = 0.; + sum = 1.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + i__2 = j - 1; + zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum); +/* L110: */ + } + } else { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = *n - j; + zlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum); +/* L120: */ + } + } + sum *= 2; + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + if (a[i__2].r != 0.) { + i__2 = i__ + i__ * a_dim1; + absa = (d__1 = a[i__2].r, abs(d__1)); + if (scale < absa) { +/* Computing 2nd power */ + d__1 = scale / absa; + sum = sum * (d__1 * d__1) + 1.; + scale = absa; + } else { +/* Computing 2nd power */ + d__1 = absa / scale; + sum += d__1 * d__1; + } + } +/* L130: */ + } + value = scale * sqrt(sum); + } + + ret_val = value; + return ret_val; + +/* End of ZLANHE */ + +} /* zlanhe_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlarf.cpp b/lib/linalg/zlarf.cpp new file mode 100644 index 0000000000..453644e7d9 --- /dev/null +++ b/lib/linalg/zlarf.cpp @@ -0,0 +1,283 @@ +/* fortran/zlarf.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublecomplex c_b1 = {1.,0.}; +static doublecomplex c_b2 = {0.,0.}; +static integer c__1 = 1; + +/* > \brief \b ZLARF applies an elementary reflector to a general rectangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLARF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE */ +/* INTEGER INCV, LDC, M, N */ +/* COMPLEX*16 TAU */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLARF applies a complex elementary reflector H to a complex M-by-N */ +/* > matrix C, from either the left or the right. H is represented in the */ +/* > form */ +/* > */ +/* > H = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar and v is a complex vector. */ +/* > */ +/* > If tau = 0, then H is taken to be the unit matrix. */ +/* > */ +/* > To apply H**H, supply conjg(tau) instead */ +/* > tau. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': form H * C */ +/* > = 'R': form C * H */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension */ +/* > (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ +/* > or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ +/* > The vector v in the representation of H. V is not used if */ +/* > TAU = 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCV */ +/* > \verbatim */ +/* > INCV is INTEGER */ +/* > The increment between elements of v. INCV <> 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 */ +/* > The value tau in the representation of H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ +/* > or C * H if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension */ +/* > (N) if SIDE = 'L' */ +/* > or (M) if SIDE = 'R' */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlarf_(char *side, integer *m, integer *n, doublecomplex + *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * + ldc, doublecomplex *work, ftnlen side_len) +{ + /* System generated locals */ + integer c_dim1, c_offset, i__1; + doublecomplex z__1; + + /* Local variables */ + integer i__; + logical applyleft; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer lastc; + extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); + integer lastv; + extern integer ilazlc_(integer *, integer *, doublecomplex *, integer *), + ilazlr_(integer *, integer *, doublecomplex *, integer *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + applyleft = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + lastv = 0; + lastc = 0; + if (tau->r != 0. || tau->i != 0.) { +/* Set up variables for scanning V. LASTV begins pointing to the end */ +/* of V. */ + if (applyleft) { + lastv = *m; + } else { + lastv = *n; + } + if (*incv > 0) { + i__ = (lastv - 1) * *incv + 1; + } else { + i__ = 1; + } +/* Look for the last non-zero row in V. */ + for(;;) { /* while(complicated condition) */ + i__1 = i__; + if (!(lastv > 0 && (v[i__1].r == 0. && v[i__1].i == 0.))) + break; + --lastv; + i__ -= *incv; + } + if (applyleft) { +/* Scan for the last non-zero column in C(1:lastv,:). */ + lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); + } else { +/* Scan for the last non-zero row in C(:,1:lastv). */ + lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); + } + } +/* Note that lastc.eq.0 renders the BLAS operations null; no special */ +/* case is needed at this level. */ + if (applyleft) { + +/* Form H * C */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastv,1:lastc)**H * v(1:lastv,1) */ + + zgemv_((char *)"Conjugate transpose", &lastv, &lastc, &c_b1, &c__[ + c_offset], ldc, &v[1], incv, &c_b2, &work[1], &c__1, ( + ftnlen)19); + +/* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**H */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(&lastv, &lastc, &z__1, &v[1], incv, &work[1], &c__1, &c__[ + c_offset], ldc); + } + } else { + +/* Form C * H */ + + if (lastv > 0) { + +/* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ + + zgemv_((char *)"No transpose", &lastc, &lastv, &c_b1, &c__[c_offset], ldc, + &v[1], incv, &c_b2, &work[1], &c__1, (ftnlen)12); + +/* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**H */ + + z__1.r = -tau->r, z__1.i = -tau->i; + zgerc_(&lastc, &lastv, &z__1, &work[1], &c__1, &v[1], incv, &c__[ + c_offset], ldc); + } + } + return 0; + +/* End of ZLARF */ + +} /* zlarf_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlarfb.cpp b/lib/linalg/zlarfb.cpp new file mode 100644 index 0000000000..0ea0c17bd4 --- /dev/null +++ b/lib/linalg/zlarfb.cpp @@ -0,0 +1,949 @@ +/* fortran/zlarfb.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublecomplex c_b1 = {1.,0.}; +static integer c__1 = 1; + +/* > \brief \b ZLARFB applies a block reflector or its conjugate-transpose to a general rectangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLARFB + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, */ +/* T, LDT, C, LDC, WORK, LDWORK ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIRECT, SIDE, STOREV, TRANS */ +/* INTEGER K, LDC, LDT, LDV, LDWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), */ +/* $ WORK( LDWORK, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLARFB applies a complex block reflector H or its transpose H**H to a */ +/* > complex M-by-N matrix C, from either the left or the right. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply H or H**H from the Left */ +/* > = 'R': apply H or H**H from the Right */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': apply H (No transpose) */ +/* > = 'C': apply H**H (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Indicates how H is formed from a product of elementary */ +/* > reflectors */ +/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] STOREV */ +/* > \verbatim */ +/* > STOREV is CHARACTER*1 */ +/* > Indicates how the vectors which define the elementary */ +/* > reflectors are stored: */ +/* > = 'C': Columnwise */ +/* > = 'R': Rowwise */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The order of the matrix T (= the number of elementary */ +/* > reflectors whose product defines the block reflector). */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension */ +/* > (LDV,K) if STOREV = 'C' */ +/* > (LDV,M) if STOREV = 'R' and SIDE = 'L' */ +/* > (LDV,N) if STOREV = 'R' and SIDE = 'R' */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); */ +/* > if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); */ +/* > if STOREV = 'R', LDV >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] T */ +/* > \verbatim */ +/* > T is COMPLEX*16 array, dimension (LDT,K) */ +/* > The triangular K-by-K matrix T in the representation of the */ +/* > block reflector. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by H*C or H**H*C or C*H or C*H**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (LDWORK,K) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDWORK */ +/* > \verbatim */ +/* > LDWORK is INTEGER */ +/* > The leading dimension of the array WORK. */ +/* > If SIDE = 'L', LDWORK >= max(1,N); */ +/* > if SIDE = 'R', LDWORK >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The shape of the matrix V and the storage of the vectors which define */ +/* > the H(i) is best illustrated by the following example with n = 5 and */ +/* > k = 3. The elements equal to 1 are not stored; the corresponding */ +/* > array elements are modified but restored on exit. The rest of the */ +/* > array is not used. */ +/* > */ +/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ +/* > */ +/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* > ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* > ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ +/* > */ +/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* > ( 1 v3 ) */ +/* > ( 1 ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zlarfb_(char *side, char *trans, char *direct, char * + storev, integer *m, integer *n, integer *k, doublecomplex *v, integer + *ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer * + ldc, doublecomplex *work, integer *ldwork, ftnlen side_len, ftnlen + trans_len, ftnlen direct_len, ftnlen storev_len) +{ + /* System generated locals */ + integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, + work_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen), zcopy_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), ztrmm_(char *, char *, + char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, + ftnlen, ftnlen, ftnlen), zlacgv_(integer *, doublecomplex *, + integer *); + char transt[1]; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + work_dim1 = *ldwork; + work_offset = 1 + work_dim1; + work -= work_offset; + + /* Function Body */ + if (*m <= 0 || *n <= 0) { + return 0; + } + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *(unsigned char *)transt = 'C'; + } else { + *(unsigned char *)transt = 'N'; + } + + if (lsame_(storev, (char *)"C", (ftnlen)1, (ftnlen)1)) { + + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + +/* Let V = ( V1 ) (first K rows) */ +/* ( V2 ) */ +/* where V1 is unit lower triangular. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form H * C or H**H * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) */ + +/* W := C1**H */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); + zlacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L10: */ + } + +/* W := W * V1 */ + + ztrmm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, k, &c_b1, + &v[v_offset], ldv, &work[work_offset], ldwork, ( + ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*m > *k) { + +/* W := W + C2**H * V2 */ + + i__1 = *m - *k; + zgemm_((char *)"Conjugate transpose", (char *)"No transpose", n, k, &i__1, + &c_b1, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + + v_dim1], ldv, &c_b1, &work[work_offset], ldwork, ( + ftnlen)19, (ftnlen)12); + } + +/* W := W * T**H or W * T */ + + ztrmm_((char *)"Right", (char *)"Upper", transt, (char *)"Non-unit", n, k, &c_b1, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - V * W**H */ + + if (*m > *k) { + +/* C2 := C2 - V2 * W**H */ + + i__1 = *m - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", &i__1, n, k, + &z__1, &v[*k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, &c_b1, &c__[*k + 1 + c_dim1] + , ldc, (ftnlen)12, (ftnlen)19); + } + +/* W := W * V1**H */ + + ztrmm_((char *)"Right", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Unit", n, k, + &c_b1, &v[v_offset], ldv, &work[work_offset], ldwork, + (ftnlen)5, (ftnlen)5, (ftnlen)19, (ftnlen)4); + +/* C1 := C1 - W**H */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form C * H or C * H**H where C = ( C1 C2 ) */ + +/* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ + +/* W := C1 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L40: */ + } + +/* W := W * V1 */ + + ztrmm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", m, k, &c_b1, + &v[v_offset], ldv, &work[work_offset], ldwork, ( + ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*n > *k) { + +/* W := W + C2 * V2 */ + + i__1 = *n - *k; + zgemm_((char *)"No transpose", (char *)"No transpose", m, k, &i__1, &c_b1, + &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + 1 + + v_dim1], ldv, &c_b1, &work[work_offset], ldwork, ( + ftnlen)12, (ftnlen)12); + } + +/* W := W * T or W * T**H */ + + ztrmm_((char *)"Right", (char *)"Upper", trans, (char *)"Non-unit", m, k, &c_b1, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - W * V**H */ + + if (*n > *k) { + +/* C2 := C2 - W * V2**H */ + + i__1 = *n - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", m, &i__1, k, + &z__1, &work[work_offset], ldwork, &v[*k + 1 + + v_dim1], ldv, &c_b1, &c__[(*k + 1) * c_dim1 + 1], + ldc, (ftnlen)12, (ftnlen)19); + } + +/* W := W * V1**H */ + + ztrmm_((char *)"Right", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Unit", m, k, + &c_b1, &v[v_offset], ldv, &work[work_offset], ldwork, + (ftnlen)5, (ftnlen)5, (ftnlen)19, (ftnlen)4); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L50: */ + } +/* L60: */ + } + } + + } else { + +/* Let V = ( V1 ) */ +/* ( V2 ) (last K rows) */ +/* where V2 is unit upper triangular. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form H * C or H**H * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) */ + +/* W := C2**H */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); + zlacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L70: */ + } + +/* W := W * V2 */ + + ztrmm_((char *)"Right", (char *)"Upper", (char *)"No transpose", (char *)"Unit", n, k, &c_b1, + &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*m > *k) { + +/* W := W + C1**H * V1 */ + + i__1 = *m - *k; + zgemm_((char *)"Conjugate transpose", (char *)"No transpose", n, k, &i__1, + &c_b1, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b1, &work[work_offset], ldwork, (ftnlen)19, ( + ftnlen)12); + } + +/* W := W * T**H or W * T */ + + ztrmm_((char *)"Right", (char *)"Lower", transt, (char *)"Non-unit", n, k, &c_b1, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - V * W**H */ + + if (*m > *k) { + +/* C1 := C1 - V1 * W**H */ + + i__1 = *m - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", &i__1, n, k, + &z__1, &v[v_offset], ldv, &work[work_offset], + ldwork, &c_b1, &c__[c_offset], ldc, (ftnlen)12, ( + ftnlen)19); + } + +/* W := W * V2**H */ + + ztrmm_((char *)"Right", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Unit", n, k, + &c_b1, &v[*m - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, (ftnlen)5, (ftnlen)5, (ftnlen) + 19, (ftnlen)4); + +/* C2 := C2 - W**H */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m - *k + j + i__ * c_dim1; + i__4 = *m - *k + j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form C * H or C * H**H where C = ( C1 C2 ) */ + +/* W := C * V = (C1*V1 + C2*V2) (stored in WORK) */ + +/* W := C2 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L100: */ + } + +/* W := W * V2 */ + + ztrmm_((char *)"Right", (char *)"Upper", (char *)"No transpose", (char *)"Unit", m, k, &c_b1, + &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset], + ldwork, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*n > *k) { + +/* W := W + C1 * V1 */ + + i__1 = *n - *k; + zgemm_((char *)"No transpose", (char *)"No transpose", m, k, &i__1, &c_b1, + &c__[c_offset], ldc, &v[v_offset], ldv, &c_b1, & + work[work_offset], ldwork, (ftnlen)12, (ftnlen)12) + ; + } + +/* W := W * T or W * T**H */ + + ztrmm_((char *)"Right", (char *)"Lower", trans, (char *)"Non-unit", m, k, &c_b1, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - W * V**H */ + + if (*n > *k) { + +/* C1 := C1 - W * V1**H */ + + i__1 = *n - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", m, &i__1, k, + &z__1, &work[work_offset], ldwork, &v[v_offset], + ldv, &c_b1, &c__[c_offset], ldc, (ftnlen)12, ( + ftnlen)19); + } + +/* W := W * V2**H */ + + ztrmm_((char *)"Right", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Unit", m, k, + &c_b1, &v[*n - *k + 1 + v_dim1], ldv, &work[ + work_offset], ldwork, (ftnlen)5, (ftnlen)5, (ftnlen) + 19, (ftnlen)4); + +/* C2 := C2 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (*n - *k + j) * c_dim1; + i__4 = i__ + (*n - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L110: */ + } +/* L120: */ + } + } + } + + } else if (lsame_(storev, (char *)"R", (ftnlen)1, (ftnlen)1)) { + + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + +/* Let V = ( V1 V2 ) (V1: first K columns) */ +/* where V1 is unit upper triangular. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form H * C or H**H * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) */ + +/* W := C1**H */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], + &c__1); + zlacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L130: */ + } + +/* W := W * V1**H */ + + ztrmm_((char *)"Right", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Unit", n, k, + &c_b1, &v[v_offset], ldv, &work[work_offset], ldwork, + (ftnlen)5, (ftnlen)5, (ftnlen)19, (ftnlen)4); + if (*m > *k) { + +/* W := W + C2**H * V2**H */ + + i__1 = *m - *k; + zgemm_((char *)"Conjugate transpose", (char *)"Conjugate transpose", n, k, + &i__1, &c_b1, &c__[*k + 1 + c_dim1], ldc, &v[(*k + + 1) * v_dim1 + 1], ldv, &c_b1, &work[work_offset] + , ldwork, (ftnlen)19, (ftnlen)19); + } + +/* W := W * T**H or W * T */ + + ztrmm_((char *)"Right", (char *)"Upper", transt, (char *)"Non-unit", n, k, &c_b1, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - V**H * W**H */ + + if (*m > *k) { + +/* C2 := C2 - V2**H * W**H */ + + i__1 = *m - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"Conjugate transpose", (char *)"Conjugate transpose", & + i__1, n, k, &z__1, &v[(*k + 1) * v_dim1 + 1], ldv, + &work[work_offset], ldwork, &c_b1, &c__[*k + 1 + + c_dim1], ldc, (ftnlen)19, (ftnlen)19); + } + +/* W := W * V1 */ + + ztrmm_((char *)"Right", (char *)"Upper", (char *)"No transpose", (char *)"Unit", n, k, &c_b1, + &v[v_offset], ldv, &work[work_offset], ldwork, ( + ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + +/* C1 := C1 - W**H */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * c_dim1; + i__4 = j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L140: */ + } +/* L150: */ + } + + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form C * H or C * H**H where C = ( C1 C2 ) */ + +/* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) */ + +/* W := C1 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * + work_dim1 + 1], &c__1); +/* L160: */ + } + +/* W := W * V1**H */ + + ztrmm_((char *)"Right", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Unit", m, k, + &c_b1, &v[v_offset], ldv, &work[work_offset], ldwork, + (ftnlen)5, (ftnlen)5, (ftnlen)19, (ftnlen)4); + if (*n > *k) { + +/* W := W + C2 * V2**H */ + + i__1 = *n - *k; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", m, k, &i__1, + &c_b1, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + + 1) * v_dim1 + 1], ldv, &c_b1, &work[work_offset] + , ldwork, (ftnlen)12, (ftnlen)19); + } + +/* W := W * T or W * T**H */ + + ztrmm_((char *)"Right", (char *)"Upper", trans, (char *)"Non-unit", m, k, &c_b1, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C2 := C2 - W * V2 */ + + i__1 = *n - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"No transpose", m, &i__1, k, &z__1, + &work[work_offset], ldwork, &v[(*k + 1) * v_dim1 + + 1], ldv, &c_b1, &c__[(*k + 1) * c_dim1 + 1], + ldc, (ftnlen)12, (ftnlen)12); + } + +/* W := W * V1 */ + + ztrmm_((char *)"Right", (char *)"Upper", (char *)"No transpose", (char *)"Unit", m, k, &c_b1, + &v[v_offset], ldv, &work[work_offset], ldwork, ( + ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L170: */ + } +/* L180: */ + } + + } + + } else { + +/* Let V = ( V1 V2 ) (V2: last K columns) */ +/* where V2 is unit lower triangular. */ + + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form H * C or H**H * C where C = ( C1 ) */ +/* ( C2 ) */ + +/* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) */ + +/* W := C2**H */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j * + work_dim1 + 1], &c__1); + zlacgv_(n, &work[j * work_dim1 + 1], &c__1); +/* L190: */ + } + +/* W := W * V2**H */ + + ztrmm_((char *)"Right", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Unit", n, k, + &c_b1, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, (ftnlen)5, (ftnlen)5, (ftnlen) + 19, (ftnlen)4); + if (*m > *k) { + +/* W := W + C1**H * V1**H */ + + i__1 = *m - *k; + zgemm_((char *)"Conjugate transpose", (char *)"Conjugate transpose", n, k, + &i__1, &c_b1, &c__[c_offset], ldc, &v[v_offset], + ldv, &c_b1, &work[work_offset], ldwork, (ftnlen) + 19, (ftnlen)19); + } + +/* W := W * T**H or W * T */ + + ztrmm_((char *)"Right", (char *)"Lower", transt, (char *)"Non-unit", n, k, &c_b1, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - V**H * W**H */ + + if (*m > *k) { + +/* C1 := C1 - V1**H * W**H */ + + i__1 = *m - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"Conjugate transpose", (char *)"Conjugate transpose", & + i__1, n, k, &z__1, &v[v_offset], ldv, &work[ + work_offset], ldwork, &c_b1, &c__[c_offset], ldc, + (ftnlen)19, (ftnlen)19); + } + +/* W := W * V2 */ + + ztrmm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, k, &c_b1, + &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, (ftnlen)5, (ftnlen)5, (ftnlen) + 12, (ftnlen)4); + +/* C2 := C2 - W**H */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = *m - *k + j + i__ * c_dim1; + i__4 = *m - *k + j + i__ * c_dim1; + d_cnjg(&z__2, &work[i__ + j * work_dim1]); + z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i - + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L200: */ + } +/* L210: */ + } + + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form C * H or C * H**H where C = ( C1 C2 ) */ + +/* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) */ + +/* W := C2 */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[ + j * work_dim1 + 1], &c__1); +/* L220: */ + } + +/* W := W * V2**H */ + + ztrmm_((char *)"Right", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Unit", m, k, + &c_b1, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, (ftnlen)5, (ftnlen)5, (ftnlen) + 19, (ftnlen)4); + if (*n > *k) { + +/* W := W + C1 * V1**H */ + + i__1 = *n - *k; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", m, k, &i__1, + &c_b1, &c__[c_offset], ldc, &v[v_offset], ldv, & + c_b1, &work[work_offset], ldwork, (ftnlen)12, ( + ftnlen)19); + } + +/* W := W * T or W * T**H */ + + ztrmm_((char *)"Right", (char *)"Lower", trans, (char *)"Non-unit", m, k, &c_b1, &t[ + t_offset], ldt, &work[work_offset], ldwork, (ftnlen)5, + (ftnlen)5, (ftnlen)1, (ftnlen)8); + +/* C := C - W * V */ + + if (*n > *k) { + +/* C1 := C1 - W * V1 */ + + i__1 = *n - *k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"No transpose", m, &i__1, k, &z__1, + &work[work_offset], ldwork, &v[v_offset], ldv, & + c_b1, &c__[c_offset], ldc, (ftnlen)12, (ftnlen)12) + ; + } + +/* W := W * V2 */ + + ztrmm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", m, k, &c_b1, + &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[ + work_offset], ldwork, (ftnlen)5, (ftnlen)5, (ftnlen) + 12, (ftnlen)4); + +/* C1 := C1 - W */ + + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (*n - *k + j) * c_dim1; + i__4 = i__ + (*n - *k + j) * c_dim1; + i__5 = i__ + j * work_dim1; + z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[ + i__4].i - work[i__5].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; +/* L230: */ + } +/* L240: */ + } + + } + + } + } + + return 0; + +/* End of ZLARFB */ + +} /* zlarfb_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlarfg.cpp b/lib/linalg/zlarfg.cpp new file mode 100644 index 0000000000..e3d58be892 --- /dev/null +++ b/lib/linalg/zlarfg.cpp @@ -0,0 +1,261 @@ +/* fortran/zlarfg.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublecomplex c_b5 = {1.,0.}; + +/* > \brief \b ZLARFG generates an elementary reflector (Householder matrix). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLARFG + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX, N */ +/* COMPLEX*16 ALPHA, TAU */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 X( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLARFG generates a complex elementary reflector H of order n, such */ +/* > that */ +/* > */ +/* > H**H * ( alpha ) = ( beta ), H**H * H = I. */ +/* > ( x ) ( 0 ) */ +/* > */ +/* > where alpha and beta are scalars, with beta real, and x is an */ +/* > (n-1)-element complex vector. H is represented in the form */ +/* > */ +/* > H = I - tau * ( 1 ) * ( 1 v**H ) , */ +/* > ( v ) */ +/* > */ +/* > where tau is a complex scalar and v is a complex (n-1)-element */ +/* > vector. Note that H is not hermitian. */ +/* > */ +/* > If the elements of x are all zero and alpha is real, then tau = 0 */ +/* > and H is taken to be the unit matrix. */ +/* > */ +/* > Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the elementary reflector. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 */ +/* > On entry, the value alpha. */ +/* > On exit, it is overwritten with the value beta. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension */ +/* > (1+(N-2)*abs(INCX)) */ +/* > On entry, the vector x. */ +/* > On exit, it is overwritten with the vector v. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The increment between elements of X. INCX > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 */ +/* > The value tau. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlarfg_(integer *n, doublecomplex *alpha, doublecomplex * + x, integer *incx, doublecomplex *tau) +{ + /* System generated locals */ + integer i__1; + doublereal d__1, d__2; + doublecomplex z__1, z__2; + + /* Builtin functions */ + double d_imag(doublecomplex *), d_sign(doublereal *, doublereal *); + + /* Local variables */ + integer j, knt; + doublereal beta, alphi, alphr; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + doublereal xnorm; + extern doublereal dlapy3_(doublereal *, doublereal *, doublereal *), + dznrm2_(integer *, doublecomplex *, integer *), dlamch_(char *, + ftnlen); + doublereal safmin; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *); + doublereal rsafmn; + extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, + doublecomplex *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + tau->r = 0., tau->i = 0.; + return 0; + } + + i__1 = *n - 1; + xnorm = dznrm2_(&i__1, &x[1], incx); + alphr = alpha->r; + alphi = d_imag(alpha); + + if (xnorm == 0. && alphi == 0.) { + +/* H = I */ + + tau->r = 0., tau->i = 0.; + } else { + +/* general case */ + + d__1 = dlapy3_(&alphr, &alphi, &xnorm); + beta = -d_sign(&d__1, &alphr); + safmin = dlamch_((char *)"S", (ftnlen)1) / dlamch_((char *)"E", (ftnlen)1); + rsafmn = 1. / safmin; + + knt = 0; + if (abs(beta) < safmin) { + +/* XNORM, BETA may be inaccurate; scale X and recompute them */ + +L10: + ++knt; + i__1 = *n - 1; + zdscal_(&i__1, &rsafmn, &x[1], incx); + beta *= rsafmn; + alphi *= rsafmn; + alphr *= rsafmn; + if (abs(beta) < safmin && knt < 20) { + goto L10; + } + +/* New BETA is at most 1, at least SAFMIN */ + + i__1 = *n - 1; + xnorm = dznrm2_(&i__1, &x[1], incx); + z__1.r = alphr, z__1.i = alphi; + alpha->r = z__1.r, alpha->i = z__1.i; + d__1 = dlapy3_(&alphr, &alphi, &xnorm); + beta = -d_sign(&d__1, &alphr); + } + d__1 = (beta - alphr) / beta; + d__2 = -alphi / beta; + z__1.r = d__1, z__1.i = d__2; + tau->r = z__1.r, tau->i = z__1.i; + z__2.r = alpha->r - beta, z__2.i = alpha->i; + zladiv_(&z__1, &c_b5, &z__2); + alpha->r = z__1.r, alpha->i = z__1.i; + i__1 = *n - 1; + zscal_(&i__1, alpha, &x[1], incx); + +/* If ALPHA is subnormal, it may lose relative accuracy */ + + i__1 = knt; + for (j = 1; j <= i__1; ++j) { + beta *= safmin; +/* L20: */ + } + alpha->r = beta, alpha->i = 0.; + } + + return 0; + +/* End of ZLARFG */ + +} /* zlarfg_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlarft.cpp b/lib/linalg/zlarft.cpp new file mode 100644 index 0000000000..d907f6792d --- /dev/null +++ b/lib/linalg/zlarft.cpp @@ -0,0 +1,465 @@ +/* static/zlarft.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublecomplex c_b1 = {1.,0.}; +static integer c__1 = 1; + +/* > \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLARFT + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIRECT, STOREV */ +/* INTEGER K, LDT, LDV, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLARFT forms the triangular factor T of a complex block reflector H */ +/* > of order n, which is defined as a product of k elementary reflectors. */ +/* > */ +/* > If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */ +/* > */ +/* > If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */ +/* > */ +/* > If STOREV = 'C', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th column of the array V, and */ +/* > */ +/* > H = I - V * T * V**H */ +/* > */ +/* > If STOREV = 'R', the vector which defines the elementary reflector */ +/* > H(i) is stored in the i-th row of the array V, and */ +/* > */ +/* > H = I - V**H * T * V */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Specifies the order in which the elementary reflectors are */ +/* > multiplied to form the block reflector: */ +/* > = 'F': H = H(1) H(2) . . . H(k) (Forward) */ +/* > = 'B': H = H(k) . . . H(2) H(1) (Backward) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] STOREV */ +/* > \verbatim */ +/* > STOREV is CHARACTER*1 */ +/* > Specifies how the vectors which define the elementary */ +/* > reflectors are stored (see also Further Details): */ +/* > = 'C': columnwise */ +/* > = 'R': rowwise */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the block reflector H. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The order of the triangular factor T (= the number of */ +/* > elementary reflectors). K >= 1. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] V */ +/* > \verbatim */ +/* > V is COMPLEX*16 array, dimension */ +/* > (LDV,K) if STOREV = 'C' */ +/* > (LDV,N) if STOREV = 'R' */ +/* > The matrix V. See further details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDV */ +/* > \verbatim */ +/* > LDV is INTEGER */ +/* > The leading dimension of the array V. */ +/* > If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] T */ +/* > \verbatim */ +/* > T is COMPLEX*16 array, dimension (LDT,K) */ +/* > The k by k triangular factor T of the block reflector. */ +/* > If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */ +/* > lower triangular. The rest of the array is not used. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDT */ +/* > \verbatim */ +/* > LDT is INTEGER */ +/* > The leading dimension of the array T. LDT >= K. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The shape of the matrix V and the storage of the vectors which define */ +/* > the H(i) is best illustrated by the following example with n = 5 and */ +/* > k = 3. The elements equal to 1 are not stored. */ +/* > */ +/* > DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': */ +/* > */ +/* > V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) */ +/* > ( v1 1 ) ( 1 v2 v2 v2 ) */ +/* > ( v1 v2 1 ) ( 1 v3 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > ( v1 v2 v3 ) */ +/* > */ +/* > DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': */ +/* > */ +/* > V = ( v1 v2 v3 ) V = ( v1 v1 1 ) */ +/* > ( v1 v2 v3 ) ( v2 v2 v2 1 ) */ +/* > ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) */ +/* > ( 1 v3 ) */ +/* > ( 1 ) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer * + k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * + t, integer *ldt, ftnlen direct_len, ftnlen storev_len) +{ + /* System generated locals */ + integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, prevlastv; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, + integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen), zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); + integer lastv; + extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, + ftnlen, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + --tau; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + + /* Function Body */ + if (*n == 0) { + return 0; + } + + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + prevlastv = *n; + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + prevlastv = max(prevlastv,i__); + i__2 = i__; + if (tau[i__2].r == 0. && tau[i__2].i == 0.) { + +/* H(i) = I */ + + i__2 = i__; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + t[i__3].r = 0., t[i__3].i = 0.; + } + } else { + +/* general case */ + + if (lsame_(storev, (char *)"C", (ftnlen)1, (ftnlen)1)) { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = lastv + i__ * v_dim1; + if (v[i__3].r != 0. || v[i__3].i != 0.) { + goto L220; + } + } +L220: + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + i__4 = i__; + z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i; + d_cnjg(&z__3, &v[i__ + j * v_dim1]); + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = + z__2.r * z__3.i + z__2.i * z__3.r; + t[i__3].r = z__1.r, t[i__3].i = z__1.i; + } + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) */ + + i__2 = j - i__; + i__3 = i__ - 1; + i__4 = i__; + z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; + zgemv_((char *)"Conjugate transpose", &i__2, &i__3, &z__1, &v[i__ + + 1 + v_dim1], ldv, &v[i__ + 1 + i__ * v_dim1], & + c__1, &c_b1, &t[i__ * t_dim1 + 1], &c__1, (ftnlen) + 19); + } else { +/* Skip any trailing zeros. */ + i__2 = i__ + 1; + for (lastv = *n; lastv >= i__2; --lastv) { + i__3 = i__ + lastv * v_dim1; + if (v[i__3].r != 0. || v[i__3].i != 0.) { + goto L236; + } + } +L236: + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + i__ * t_dim1; + i__4 = i__; + z__2.r = -tau[i__4].r, z__2.i = -tau[i__4].i; + i__5 = j + i__ * v_dim1; + z__1.r = z__2.r * v[i__5].r - z__2.i * v[i__5].i, + z__1.i = z__2.r * v[i__5].i + z__2.i * v[i__5] + .r; + t[i__3].r = z__1.r, t[i__3].i = z__1.i; + } + j = min(lastv,prevlastv); + +/* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H */ + + i__2 = i__ - 1; + i__3 = j - i__; + i__4 = i__; + z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i; + zgemm_((char *)"N", (char *)"C", &i__2, &c__1, &i__3, &z__1, &v[(i__ + 1) + * v_dim1 + 1], ldv, &v[i__ + (i__ + 1) * v_dim1], + ldv, &c_b1, &t[i__ * t_dim1 + 1], ldt, (ftnlen)1, + (ftnlen)1); + } + +/* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ + + i__2 = i__ - 1; + ztrmv_((char *)"Upper", (char *)"No transpose", (char *)"Non-unit", &i__2, &t[ + t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1, (ftnlen) + 5, (ftnlen)12, (ftnlen)8); + i__2 = i__ + i__ * t_dim1; + i__3 = i__; + t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; + if (i__ > 1) { + prevlastv = max(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + } + } else { + prevlastv = 1; + for (i__ = *k; i__ >= 1; --i__) { + i__1 = i__; + if (tau[i__1].r == 0. && tau[i__1].i == 0.) { + +/* H(i) = I */ + + i__1 = *k; + for (j = i__; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + t[i__2].r = 0., t[i__2].i = 0.; + } + } else { + +/* general case */ + + if (i__ < *k) { + if (lsame_(storev, (char *)"C", (ftnlen)1, (ftnlen)1)) { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = lastv + i__ * v_dim1; + if (v[i__2].r != 0. || v[i__2].i != 0.) { + goto L281; + } + } +L281: + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + i__3 = i__; + z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i; + d_cnjg(&z__3, &v[*n - *k + i__ + j * v_dim1]); + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * + z__3.r; + t[i__2].r = z__1.r, t[i__2].i = z__1.i; + } + j = max(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) */ + + i__1 = *n - *k + i__ - j; + i__2 = *k - i__; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zgemv_((char *)"Conjugate transpose", &i__1, &i__2, &z__1, &v[ + j + (i__ + 1) * v_dim1], ldv, &v[j + i__ * + v_dim1], &c__1, &c_b1, &t[i__ + 1 + i__ * + t_dim1], &c__1, (ftnlen)19); + } else { +/* Skip any leading zeros. */ + i__1 = i__ - 1; + for (lastv = 1; lastv <= i__1; ++lastv) { + i__2 = i__ + lastv * v_dim1; + if (v[i__2].r != 0. || v[i__2].i != 0.) { + goto L297; + } + } +L297: + i__1 = *k; + for (j = i__ + 1; j <= i__1; ++j) { + i__2 = j + i__ * t_dim1; + i__3 = i__; + z__2.r = -tau[i__3].r, z__2.i = -tau[i__3].i; + i__4 = j + (*n - *k + i__) * v_dim1; + z__1.r = z__2.r * v[i__4].r - z__2.i * v[i__4].i, + z__1.i = z__2.r * v[i__4].i + z__2.i * v[ + i__4].r; + t[i__2].r = z__1.r, t[i__2].i = z__1.i; + } + j = max(lastv,prevlastv); + +/* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H */ + + i__1 = *k - i__; + i__2 = *n - *k + i__ - j; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zgemm_((char *)"N", (char *)"C", &i__1, &c__1, &i__2, &z__1, &v[i__ + + 1 + j * v_dim1], ldv, &v[i__ + j * v_dim1], + ldv, &c_b1, &t[i__ + 1 + i__ * t_dim1], ldt, ( + ftnlen)1, (ftnlen)1); + } + +/* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ + + i__1 = *k - i__; + ztrmv_((char *)"Lower", (char *)"No transpose", (char *)"Non-unit", &i__1, &t[i__ + + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * + t_dim1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)8) + ; + if (i__ > 1) { + prevlastv = min(prevlastv,lastv); + } else { + prevlastv = lastv; + } + } + i__1 = i__ + i__ * t_dim1; + i__2 = i__; + t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i; + } + } + } + return 0; + +/* End of ZLARFT */ + +} /* zlarft_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlascl.cpp b/lib/linalg/zlascl.cpp new file mode 100644 index 0000000000..337dd64793 --- /dev/null +++ b/lib/linalg/zlascl.cpp @@ -0,0 +1,470 @@ +/* fortran/zlascl.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLASCL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER TYPE */ +/* INTEGER INFO, KL, KU, LDA, M, N */ +/* DOUBLE PRECISION CFROM, CTO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLASCL multiplies the M by N complex matrix A by the real scalar */ +/* > CTO/CFROM. This is done without over/underflow as long as the final */ +/* > result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that */ +/* > A may be full, upper triangular, lower triangular, upper Hessenberg, */ +/* > or banded. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] TYPE */ +/* > \verbatim */ +/* > TYPE is CHARACTER*1 */ +/* > TYPE indices the storage type of the input matrix. */ +/* > = 'G': A is a full matrix. */ +/* > = 'L': A is a lower triangular matrix. */ +/* > = 'U': A is an upper triangular matrix. */ +/* > = 'H': A is an upper Hessenberg matrix. */ +/* > = 'B': A is a symmetric band matrix with lower bandwidth KL */ +/* > and upper bandwidth KU and with the only the lower */ +/* > half stored. */ +/* > = 'Q': A is a symmetric band matrix with lower bandwidth KL */ +/* > and upper bandwidth KU and with the only the upper */ +/* > half stored. */ +/* > = 'Z': A is a band matrix with lower bandwidth KL and upper */ +/* > bandwidth KU. See ZGBTRF for storage details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KL */ +/* > \verbatim */ +/* > KL is INTEGER */ +/* > The lower bandwidth of A. Referenced only if TYPE = 'B', */ +/* > 'Q' or 'Z'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] KU */ +/* > \verbatim */ +/* > KU is INTEGER */ +/* > The upper bandwidth of A. Referenced only if TYPE = 'B', */ +/* > 'Q' or 'Z'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CFROM */ +/* > \verbatim */ +/* > CFROM is DOUBLE PRECISION */ +/* > \endverbatim */ +/* > */ +/* > \param[in] CTO */ +/* > \verbatim */ +/* > CTO is DOUBLE PRECISION */ +/* > */ +/* > The matrix A is multiplied by CTO/CFROM. A(I,J) is computed */ +/* > without over/underflow if the final result CTO*A(I,J)/CFROM */ +/* > can be represented without over/underflow. CFROM must be */ +/* > nonzero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The matrix to be multiplied by CTO/CFROM. See TYPE for the */ +/* > storage type. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If TYPE = 'G', 'L', 'U', 'H', LDA >= max(1,M); */ +/* > TYPE = 'B', LDA >= KL+1; */ +/* > TYPE = 'Q', LDA >= KU+1; */ +/* > TYPE = 'Z', LDA >= 2*KL+KU+1. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > 0 - successful exit */ +/* > <0 - if INFO = -i, the i-th argument had an illegal value. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlascl_(char *type__, integer *kl, integer *ku, + doublereal *cfrom, doublereal *cto, integer *m, integer *n, + doublecomplex *a, integer *lda, integer *info, ftnlen type_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + + /* Local variables */ + integer i__, j, k1, k2, k3, k4; + doublereal mul, cto1; + logical done; + doublereal ctoc; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer itype; + doublereal cfrom1; + extern doublereal dlamch_(char *, ftnlen); + doublereal cfromc; + extern logical disnan_(doublereal *); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + doublereal bignum, smlnum; + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + + if (lsame_(type__, (char *)"G", (ftnlen)1, (ftnlen)1)) { + itype = 0; + } else if (lsame_(type__, (char *)"L", (ftnlen)1, (ftnlen)1)) { + itype = 1; + } else if (lsame_(type__, (char *)"U", (ftnlen)1, (ftnlen)1)) { + itype = 2; + } else if (lsame_(type__, (char *)"H", (ftnlen)1, (ftnlen)1)) { + itype = 3; + } else if (lsame_(type__, (char *)"B", (ftnlen)1, (ftnlen)1)) { + itype = 4; + } else if (lsame_(type__, (char *)"Q", (ftnlen)1, (ftnlen)1)) { + itype = 5; + } else if (lsame_(type__, (char *)"Z", (ftnlen)1, (ftnlen)1)) { + itype = 6; + } else { + itype = -1; + } + + if (itype == -1) { + *info = -1; + } else if (*cfrom == 0. || disnan_(cfrom)) { + *info = -4; + } else if (disnan_(cto)) { + *info = -5; + } else if (*m < 0) { + *info = -6; + } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { + *info = -7; + } else if (itype <= 3 && *lda < max(1,*m)) { + *info = -9; + } else if (itype >= 4) { +/* Computing MAX */ + i__1 = *m - 1; + if (*kl < 0 || *kl > max(i__1,0)) { + *info = -2; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = *n - 1; + if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && + *kl != *ku) { + *info = -3; + } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * + ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { + *info = -9; + } + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLASCL", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *m == 0) { + return 0; + } + +/* Get machine parameters */ + + smlnum = dlamch_((char *)"S", (ftnlen)1); + bignum = 1. / smlnum; + + cfromc = *cfrom; + ctoc = *cto; + +L10: + cfrom1 = cfromc * smlnum; + if (cfrom1 == cfromc) { +/* CFROMC is an inf. Multiply by a correctly signed zero for */ +/* finite CTOC, or a NaN if CTOC is infinite. */ + mul = ctoc / cfromc; + done = TRUE_; + cto1 = ctoc; + } else { + cto1 = ctoc / bignum; + if (cto1 == ctoc) { +/* CTOC is either 0 or an inf. In both cases, CTOC itself */ +/* serves as the correct multiplication factor. */ + mul = ctoc; + done = TRUE_; + cfromc = 1.; + } else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { + mul = smlnum; + done = FALSE_; + cfromc = cfrom1; + } else if (abs(cto1) > abs(cfromc)) { + mul = bignum; + done = FALSE_; + ctoc = cto1; + } else { + mul = ctoc / cfromc; + done = TRUE_; + if (mul == 1.) { + return 0; + } + } + } + + if (itype == 0) { + +/* Full matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L20: */ + } +/* L30: */ + } + + } else if (itype == 1) { + +/* Lower triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L40: */ + } +/* L50: */ + } + + } else if (itype == 2) { + +/* Upper triangular matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = min(j,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L60: */ + } +/* L70: */ + } + + } else if (itype == 3) { + +/* Upper Hessenberg matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j + 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L80: */ + } +/* L90: */ + } + + } else if (itype == 4) { + +/* Lower half of a symmetric band matrix */ + + k3 = *kl + 1; + k4 = *n + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = k3, i__4 = k4 - j; + i__2 = min(i__3,i__4); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L100: */ + } +/* L110: */ + } + + } else if (itype == 5) { + +/* Upper half of a symmetric band matrix */ + + k1 = *ku + 2; + k3 = *ku + 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__2 = k1 - j; + i__3 = k3; + for (i__ = max(i__2,1); i__ <= i__3; ++i__) { + i__2 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L120: */ + } +/* L130: */ + } + + } else if (itype == 6) { + +/* Band matrix */ + + k1 = *kl + *ku + 2; + k2 = *kl + 1; + k3 = (*kl << 1) + *ku + 1; + k4 = *kl + *ku + 1 + *m; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { +/* Computing MAX */ + i__3 = k1 - j; +/* Computing MIN */ + i__4 = k3, i__5 = k4 - j; + i__2 = min(i__4,i__5); + for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L140: */ + } +/* L150: */ + } + + } + + if (! done) { + goto L10; + } + + return 0; + +/* End of ZLASCL */ + +} /* zlascl_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlaset.cpp b/lib/linalg/zlaset.cpp new file mode 100644 index 0000000000..e402ea02fe --- /dev/null +++ b/lib/linalg/zlaset.cpp @@ -0,0 +1,240 @@ +/* fortran/zlaset.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given val +ues. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLASET + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER LDA, M, N */ +/* COMPLEX*16 ALPHA, BETA */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLASET initializes a 2-D array A to BETA on the diagonal and */ +/* > ALPHA on the offdiagonals. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies the part of the matrix A to be set. */ +/* > = 'U': Upper triangular part is set. The lower triangle */ +/* > is unchanged. */ +/* > = 'L': Lower triangular part is set. The upper triangle */ +/* > is unchanged. */ +/* > Otherwise: All of the matrix A is set. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 */ +/* > All the offdiagonal array elements are set to ALPHA. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] BETA */ +/* > \verbatim */ +/* > BETA is COMPLEX*16 */ +/* > All the diagonal array elements are set to BETA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the m by n matrix A. */ +/* > On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; */ +/* > A(i,i) = BETA , 1 <= i <= min(m,n) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlaset_(char *uplo, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer * + lda, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + + /* Local variables */ + integer i__, j; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Set the diagonal to BETA and the strictly upper triangular */ +/* part of the array to ALPHA. */ + + i__1 = *n; + for (j = 2; j <= i__1; ++j) { +/* Computing MIN */ + i__3 = j - 1; + i__2 = min(i__3,*m); + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L10: */ + } +/* L20: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L30: */ + } + + } else if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Set the diagonal to BETA and the strictly lower triangular */ +/* part of the array to ALPHA. */ + + i__1 = min(*m,*n); + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L40: */ + } +/* L50: */ + } + i__1 = min(*n,*m); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L60: */ + } + + } else { + +/* Set the array to BETA on the diagonal and ALPHA on the */ +/* offdiagonal. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = alpha->r, a[i__3].i = alpha->i; +/* L70: */ + } +/* L80: */ + } + i__1 = min(*m,*n); + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + a[i__2].r = beta->r, a[i__2].i = beta->i; +/* L90: */ + } + } + + return 0; + +/* End of ZLASET */ + +} /* zlaset_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlasr.cpp b/lib/linalg/zlasr.cpp new file mode 100644 index 0000000000..23d68bf82f --- /dev/null +++ b/lib/linalg/zlasr.cpp @@ -0,0 +1,694 @@ +/* fortran/zlasr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZLASR applies a sequence of plane rotations to a general rectangular matrix. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLASR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIRECT, PIVOT, SIDE */ +/* INTEGER LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION C( * ), S( * ) */ +/* COMPLEX*16 A( LDA, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLASR applies a sequence of real plane rotations to a complex matrix */ +/* > A, from either the left or the right. */ +/* > */ +/* > When SIDE = 'L', the transformation takes the form */ +/* > */ +/* > A := P*A */ +/* > */ +/* > and when SIDE = 'R', the transformation takes the form */ +/* > */ +/* > A := A*P**T */ +/* > */ +/* > where P is an orthogonal matrix consisting of a sequence of z plane */ +/* > rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', */ +/* > and P**T is the transpose of P. */ +/* > */ +/* > When DIRECT = 'F' (Forward sequence), then */ +/* > */ +/* > P = P(z-1) * ... * P(2) * P(1) */ +/* > */ +/* > and when DIRECT = 'B' (Backward sequence), then */ +/* > */ +/* > P = P(1) * P(2) * ... * P(z-1) */ +/* > */ +/* > where P(k) is a plane rotation matrix defined by the 2-by-2 rotation */ +/* > */ +/* > R(k) = ( c(k) s(k) ) */ +/* > = ( -s(k) c(k) ). */ +/* > */ +/* > When PIVOT = 'V' (Variable pivot), the rotation is performed */ +/* > for the plane (k,k+1), i.e., P(k) has the form */ +/* > */ +/* > P(k) = ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > ( c(k) s(k) ) */ +/* > ( -s(k) c(k) ) */ +/* > ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > */ +/* > where R(k) appears as a rank-2 modification to the identity matrix in */ +/* > rows and columns k and k+1. */ +/* > */ +/* > When PIVOT = 'T' (Top pivot), the rotation is performed for the */ +/* > plane (1,k+1), so P(k) has the form */ +/* > */ +/* > P(k) = ( c(k) s(k) ) */ +/* > ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > ( -s(k) c(k) ) */ +/* > ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > */ +/* > where R(k) appears in rows and columns 1 and k+1. */ +/* > */ +/* > Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is */ +/* > performed for the plane (k,z), giving P(k) the form */ +/* > */ +/* > P(k) = ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > ( c(k) s(k) ) */ +/* > ( 1 ) */ +/* > ( ... ) */ +/* > ( 1 ) */ +/* > ( -s(k) c(k) ) */ +/* > */ +/* > where R(k) appears in rows and columns k and z. The rotations are */ +/* > performed without ever forming P(k) explicitly. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > Specifies whether the plane rotation matrix P is applied to */ +/* > A on the left or the right. */ +/* > = 'L': Left, compute A := P*A */ +/* > = 'R': Right, compute A:= A*P**T */ +/* > \endverbatim */ +/* > */ +/* > \param[in] PIVOT */ +/* > \verbatim */ +/* > PIVOT is CHARACTER*1 */ +/* > Specifies the plane for which P(k) is a plane rotation */ +/* > matrix. */ +/* > = 'V': Variable pivot, the plane (k,k+1) */ +/* > = 'T': Top pivot, the plane (1,k+1) */ +/* > = 'B': Bottom pivot, the plane (k,z) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIRECT */ +/* > \verbatim */ +/* > DIRECT is CHARACTER*1 */ +/* > Specifies whether P is a forward or backward sequence of */ +/* > plane rotations. */ +/* > = 'F': Forward, P = P(z-1)*...*P(2)*P(1) */ +/* > = 'B': Backward, P = P(1)*P(2)*...*P(z-1) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix A. If m <= 1, an immediate */ +/* > return is effected. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix A. If n <= 1, an */ +/* > immediate return is effected. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] C */ +/* > \verbatim */ +/* > C is DOUBLE PRECISION array, dimension */ +/* > (M-1) if SIDE = 'L' */ +/* > (N-1) if SIDE = 'R' */ +/* > The cosines c(k) of the plane rotations. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] S */ +/* > \verbatim */ +/* > S is DOUBLE PRECISION array, dimension */ +/* > (M-1) if SIDE = 'L' */ +/* > (N-1) if SIDE = 'R' */ +/* > The sines s(k) of the plane rotations. The 2-by-2 plane */ +/* > rotation part of the matrix P(k), R(k), has the form */ +/* > R(k) = ( c(k) s(k) ) */ +/* > ( -s(k) c(k) ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > The M-by-N matrix A. On exit, A is overwritten by P*A if */ +/* > SIDE = 'R' or by A*P**T if SIDE = 'L'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlasr_(char *side, char *pivot, char *direct, integer *m, + integer *n, doublereal *c__, doublereal *s, doublecomplex *a, + integer *lda, ftnlen side_len, ftnlen pivot_len, ftnlen direct_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1, z__2, z__3; + + /* Local variables */ + integer i__, j, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal ctemp, stemp; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters */ + + /* Parameter adjustments */ + --c__; + --s; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + info = 0; + if (! (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) || lsame_(side, (char *)"R", ( + ftnlen)1, (ftnlen)1))) { + info = 1; + } else if (! (lsame_(pivot, (char *)"V", (ftnlen)1, (ftnlen)1) || lsame_(pivot, + (char *)"T", (ftnlen)1, (ftnlen)1) || lsame_(pivot, (char *)"B", (ftnlen)1, ( + ftnlen)1))) { + info = 2; + } else if (! (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1) || lsame_(direct, + (char *)"B", (ftnlen)1, (ftnlen)1))) { + info = 3; + } else if (*m < 0) { + info = 4; + } else if (*n < 0) { + info = 5; + } else if (*lda < max(1,*m)) { + info = 9; + } + if (info != 0) { + xerbla_((char *)"ZLASR ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + +/* Form P * A */ + + if (lsame_(pivot, (char *)"V", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + 1 + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + 1 + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = j + i__ * a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = j + i__ * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = j + i__ * a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L10: */ + } + } +/* L20: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + 1 + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + 1 + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = j + i__ * a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + i__ * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = j + i__ * a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L30: */ + } + } +/* L40: */ + } + } + } else if (lsame_(pivot, (char *)"T", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *m; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ * a_dim1 + 1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ * a_dim1 + 1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ * a_dim1 + 1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L50: */ + } + } +/* L60: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *m; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ * a_dim1 + 1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ * a_dim1 + 1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ * a_dim1 + 1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L70: */ + } + } +/* L80: */ + } + } + } else if (lsame_(pivot, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *m - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = j + i__ * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = j + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[ + i__4].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = *m + i__ * a_dim1; + i__4 = *m + i__ * a_dim1; + z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[ + i__4].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L90: */ + } + } +/* L100: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *m - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = j + i__ * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = j + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[ + i__3].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = *m + i__ * a_dim1; + i__3 = *m + i__ * a_dim1; + z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[ + i__3].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L110: */ + } + } +/* L120: */ + } + } + } + } else if (lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + +/* Form A * P**T */ + + if (lsame_(pivot, (char *)"V", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + (j + 1) * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + (j + 1) * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ + j * a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ + j * a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L130: */ + } + } +/* L140: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + (j + 1) * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + (j + 1) * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ + j * a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + j * a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ + j * a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L150: */ + } + } +/* L160: */ + } + } + } else if (lsame_(pivot, (char *)"T", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (j = 2; j <= i__1; ++j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__4 = i__ + a_dim1; + z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[ + i__4].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__4 = i__ + a_dim1; + z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[ + i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L170: */ + } + } +/* L180: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *n; j >= 2; --j) { + ctemp = c__[j - 1]; + stemp = s[j - 1]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i; + i__3 = i__ + a_dim1; + z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[ + i__3].i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + a_dim1; + z__2.r = stemp * temp.r, z__2.i = stemp * temp.i; + i__3 = i__ + a_dim1; + z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[ + i__3].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L190: */ + } + } +/* L200: */ + } + } + } else if (lsame_(pivot, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (lsame_(direct, (char *)"F", (ftnlen)1, (ftnlen)1)) { + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + i__3 = i__ + j * a_dim1; + i__4 = i__ + *n * a_dim1; + z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[ + i__4].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + i__3 = i__ + *n * a_dim1; + i__4 = i__ + *n * a_dim1; + z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[ + i__4].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; +/* L210: */ + } + } +/* L220: */ + } + } else if (lsame_(direct, (char *)"B", (ftnlen)1, (ftnlen)1)) { + for (j = *n - 1; j >= 1; --j) { + ctemp = c__[j]; + stemp = s[j]; + if (ctemp != 1. || stemp != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + i__2 = i__ + j * a_dim1; + i__3 = i__ + *n * a_dim1; + z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[ + i__3].i; + z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = i__ + *n * a_dim1; + i__3 = i__ + *n * a_dim1; + z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[ + i__3].i; + z__3.r = stemp * temp.r, z__3.i = stemp * temp.i; + z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - + z__3.i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; +/* L230: */ + } + } +/* L240: */ + } + } + } + } + + return 0; + +/* End of ZLASR */ + +} /* zlasr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlassq.cpp b/lib/linalg/zlassq.cpp new file mode 100644 index 0000000000..1fa44450fe --- /dev/null +++ b/lib/linalg/zlassq.cpp @@ -0,0 +1,212 @@ +/* fortran/zlassq.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZLASSQ updates a sum of squares represented in scaled form. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLASSQ + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX, N */ +/* DOUBLE PRECISION SCALE, SUMSQ */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 X( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLASSQ returns the values scl and ssq such that */ +/* > */ +/* > ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */ +/* > */ +/* > where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is */ +/* > assumed to be at least unity and the value of ssq will then satisfy */ +/* > */ +/* > 1.0 .le. ssq .le. ( sumsq + 2*n ). */ +/* > */ +/* > scale is assumed to be non-negative and scl returns the value */ +/* > */ +/* > scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), */ +/* > i */ +/* > */ +/* > scale and sumsq must be supplied in SCALE and SUMSQ respectively. */ +/* > SCALE and SUMSQ are overwritten by scl and ssq respectively. */ +/* > */ +/* > The routine makes only one pass through the vector X. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of elements to be used from the vector X. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension (N) */ +/* > The vector x as described above. */ +/* > x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The increment between successive values of the vector X. */ +/* > INCX > 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SCALE */ +/* > \verbatim */ +/* > SCALE is DOUBLE PRECISION */ +/* > On entry, the value scale in the equation above. */ +/* > On exit, SCALE is overwritten with the value scl . */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] SUMSQ */ +/* > \verbatim */ +/* > SUMSQ is DOUBLE PRECISION */ +/* > On entry, the value sumsq in the equation above. */ +/* > On exit, SUMSQ is overwritten with the value ssq . */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \date December 2016 */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zlassq_(integer *n, doublecomplex *x, integer *incx, + doublereal *scale, doublereal *sumsq) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1; + + /* Builtin functions */ + double d_imag(doublecomplex *); + + /* Local variables */ + integer ix; + doublereal temp1; + extern logical disnan_(doublereal *); + + +/* -- LAPACK auxiliary routine (version 3.7.0) -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ +/* December 2016 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n > 0) { + i__1 = (*n - 1) * *incx + 1; + i__2 = *incx; + for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) { + i__3 = ix; + temp1 = (d__1 = x[i__3].r, abs(d__1)); + if (temp1 > 0. || disnan_(&temp1)) { + if (*scale < temp1) { +/* Computing 2nd power */ + d__1 = *scale / temp1; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + d__1 = temp1 / *scale; + *sumsq += d__1 * d__1; + } + } + temp1 = (d__1 = d_imag(&x[ix]), abs(d__1)); + if (temp1 > 0. || disnan_(&temp1)) { + if (*scale < temp1) { +/* Computing 2nd power */ + d__1 = *scale / temp1; + *sumsq = *sumsq * (d__1 * d__1) + 1; + *scale = temp1; + } else { +/* Computing 2nd power */ + d__1 = temp1 / *scale; + *sumsq += d__1 * d__1; + } + } +/* L10: */ + } + } + + return 0; + +/* End of ZLASSQ */ + +} /* zlassq_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zlatrd.cpp b/lib/linalg/zlatrd.cpp new file mode 100644 index 0000000000..eaa0fc1826 --- /dev/null +++ b/lib/linalg/zlatrd.cpp @@ -0,0 +1,509 @@ +/* fortran/zlatrd.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublecomplex c_b1 = {0.,0.}; +static doublecomplex c_b2 = {1.,0.}; +static integer c__1 = 1; + +/* > \brief \b ZLATRD reduces the first nb rows and columns of a symmetric/Hermitian matrix A to real tridiago +nal form by an unitary similarity transformation. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZLATRD + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER LDA, LDW, N, NB */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION E( * ) */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), W( LDW, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to */ +/* > Hermitian tridiagonal form by a unitary similarity */ +/* > transformation Q**H * A * Q, and returns the matrices V and W which are */ +/* > needed to apply the transformation to the unreduced part of A. */ +/* > */ +/* > If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a */ +/* > matrix, of which the upper triangle is supplied; */ +/* > if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a */ +/* > matrix, of which the lower triangle is supplied. */ +/* > */ +/* > This is an auxiliary routine called by ZHETRD. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > Specifies whether the upper or lower triangular part of the */ +/* > Hermitian matrix A is stored: */ +/* > = 'U': Upper triangular */ +/* > = 'L': Lower triangular */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] NB */ +/* > \verbatim */ +/* > NB is INTEGER */ +/* > The number of rows and columns to be reduced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ +/* > n-by-n upper triangular part of A contains the upper */ +/* > triangular part of the matrix A, and the strictly lower */ +/* > triangular part of A is not referenced. If UPLO = 'L', the */ +/* > leading n-by-n lower triangular part of A contains the lower */ +/* > triangular part of the matrix A, and the strictly upper */ +/* > triangular part of A is not referenced. */ +/* > On exit: */ +/* > if UPLO = 'U', the last NB columns have been reduced to */ +/* > tridiagonal form, with the diagonal elements overwriting */ +/* > the diagonal elements of A; the elements above the diagonal */ +/* > with the array TAU, represent the unitary matrix Q as a */ +/* > product of elementary reflectors; */ +/* > if UPLO = 'L', the first NB columns have been reduced to */ +/* > tridiagonal form, with the diagonal elements overwriting */ +/* > the diagonal elements of A; the elements below the diagonal */ +/* > with the array TAU, represent the unitary matrix Q as a */ +/* > product of elementary reflectors. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal */ +/* > elements of the last NB columns of the reduced matrix; */ +/* > if UPLO = 'L', E(1:nb) contains the subdiagonal elements of */ +/* > the first NB columns of the reduced matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N-1) */ +/* > The scalar factors of the elementary reflectors, stored in */ +/* > TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. */ +/* > See Further Details. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] W */ +/* > \verbatim */ +/* > W is COMPLEX*16 array, dimension (LDW,NB) */ +/* > The n-by-nb matrix W required to update the unreduced part */ +/* > of A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDW */ +/* > \verbatim */ +/* > LDW is INTEGER */ +/* > The leading dimension of the array W. LDW >= max(1,N). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > If UPLO = 'U', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(n) H(n-1) . . . H(n-nb+1). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), */ +/* > and tau in TAU(i-1). */ +/* > */ +/* > If UPLO = 'L', the matrix Q is represented as a product of elementary */ +/* > reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(nb). */ +/* > */ +/* > Each H(i) has the form */ +/* > */ +/* > H(i) = I - tau * v * v**H */ +/* > */ +/* > where tau is a complex scalar, and v is a complex vector with */ +/* > v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ +/* > and tau in TAU(i). */ +/* > */ +/* > The elements of the vectors v together form the n-by-nb matrix V */ +/* > which is needed, with W, to apply the transformation to the unreduced */ +/* > part of the matrix, using a Hermitian rank-2k update of the form: */ +/* > A := A - V*W**H - W*V**H. */ +/* > */ +/* > The contents of A on exit are illustrated by the following examples */ +/* > with n = 5 and nb = 2: */ +/* > */ +/* > if UPLO = 'U': if UPLO = 'L': */ +/* > */ +/* > ( a a a v4 v5 ) ( d ) */ +/* > ( a a v4 v5 ) ( 1 d ) */ +/* > ( a 1 v5 ) ( v1 1 a ) */ +/* > ( d 1 ) ( v1 v2 a a ) */ +/* > ( d ) ( v1 v2 a a a ) */ +/* > */ +/* > where d denotes a diagonal element of the reduced matrix, a denotes */ +/* > an element of the original matrix that is unchanged, and vi denotes */ +/* > an element of the vector defining H(i). */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zlatrd_(char *uplo, integer *n, integer *nb, + doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau, + doublecomplex *w, integer *ldw, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Local variables */ + integer i__, iw; + doublecomplex alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + extern /* Subroutine */ int zgemv_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + zhemv_(char *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen), zaxpy_(integer *, + doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, + integer *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Quick return if possible */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --e; + --tau; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + +/* Reduce last NB columns of upper triangle */ + + i__1 = *n - *nb + 1; + for (i__ = *n; i__ >= i__1; --i__) { + iw = i__ - *n + *nb; + if (i__ < *n) { + +/* Update A(1:i,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n - i__; + zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__, &i__2, &z__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & + c_b2, &a[i__ * a_dim1 + 1], &c__1, (ftnlen)12); + i__2 = *n - i__; + zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__, &i__2, &z__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & + c_b2, &a[i__ * a_dim1 + 1], &c__1, (ftnlen)12); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + if (i__ > 1) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(1:i-2,i) */ + + i__2 = i__ - 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = i__ - 1; + zlarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__ + - 1]); + e[i__ - 1] = alpha.r; + i__2 = i__ - 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute W(1:i-1,i) */ + + i__2 = i__ - 1; + zhemv_((char *)"Upper", &i__2, &c_b2, &a[a_offset], lda, &a[i__ * + a_dim1 + 1], &c__1, &c_b1, &w[iw * w_dim1 + 1], &c__1, + (ftnlen)5); + if (i__ < *n) { + i__2 = i__ - 1; + i__3 = *n - i__; + zgemv_((char *)"Conjugate transpose", &i__2, &i__3, &c_b2, &w[(iw + + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], & + c__1, &c_b1, &w[i__ + 1 + iw * w_dim1], &c__1, ( + ftnlen)19); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) * + a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b2, &w[iw * w_dim1 + 1], &c__1, (ftnlen) + 12); + i__2 = i__ - 1; + i__3 = *n - i__; + zgemv_((char *)"Conjugate transpose", &i__2, &i__3, &c_b2, &a[( + i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], + &c__1, &c_b1, &w[i__ + 1 + iw * w_dim1], &c__1, ( + ftnlen)19); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &z__1, &w[(iw + 1) * + w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & + c__1, &c_b2, &w[iw * w_dim1 + 1], &c__1, (ftnlen) + 12); + } + i__2 = i__ - 1; + zscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); + z__3.r = -.5, z__3.i = -0.; + i__2 = i__ - 1; + z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = + z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; + i__3 = i__ - 1; + zdotc_(&z__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ * + a_dim1 + 1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = i__ - 1; + zaxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * + w_dim1 + 1], &c__1); + } + +/* L10: */ + } + } else { + +/* Reduce first NB columns of lower triangle */ + + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + +/* Update A(i:n,i) */ + + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = i__ - 1; + zlacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, + &w[i__ + w_dim1], ldw, &c_b2, &a[i__ + i__ * a_dim1], & + c__1, (ftnlen)12); + i__2 = i__ - 1; + zlacgv_(&i__2, &w[i__ + w_dim1], ldw); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__ + 1; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &z__1, &w[i__ + w_dim1], ldw, + &a[i__ + a_dim1], lda, &c_b2, &a[i__ + i__ * a_dim1], & + c__1, (ftnlen)12); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = i__ + i__ * a_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + if (i__ < *n) { + +/* Generate elementary reflector H(i) to annihilate */ +/* A(i+2:n,i) */ + + i__2 = i__ + 1 + i__ * a_dim1; + alpha.r = a[i__2].r, alpha.i = a[i__2].i; + i__2 = *n - i__; +/* Computing MIN */ + i__3 = i__ + 2; + zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, + &tau[i__]); + e[i__] = alpha.r; + i__2 = i__ + 1 + i__ * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + +/* Compute W(i+1:n,i) */ + + i__2 = *n - i__; + zhemv_((char *)"Lower", &i__2, &c_b2, &a[i__ + 1 + (i__ + 1) * a_dim1] + , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b1, &w[ + i__ + 1 + i__ * w_dim1], &c__1, (ftnlen)5); + i__2 = *n - i__; + i__3 = i__ - 1; + zgemv_((char *)"Conjugate transpose", &i__2, &i__3, &c_b2, &w[i__ + 1 + + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b1, &w[i__ * w_dim1 + 1], &c__1, (ftnlen)19); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b2, &w[ + i__ + 1 + i__ * w_dim1], &c__1, (ftnlen)12); + i__2 = *n - i__; + i__3 = i__ - 1; + zgemv_((char *)"Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 + + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & + c_b1, &w[i__ * w_dim1 + 1], &c__1, (ftnlen)19); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &z__1, &w[i__ + 1 + + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b2, &w[ + i__ + 1 + i__ * w_dim1], &c__1, (ftnlen)12); + i__2 = *n - i__; + zscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); + z__3.r = -.5, z__3.i = -0.; + i__2 = i__; + z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = + z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; + i__3 = *n - i__; + zdotc_(&z__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[ + i__ + 1 + i__ * a_dim1], &c__1); + z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * + z__4.i + z__2.i * z__4.r; + alpha.r = z__1.r, alpha.i = z__1.i; + i__2 = *n - i__; + zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ + i__ + 1 + i__ * w_dim1], &c__1); + } + +/* L20: */ + } + } + + return 0; + +/* End of ZLATRD */ + +} /* zlatrd_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zpptrf.cpp b/lib/linalg/zpptrf.cpp new file mode 100644 index 0000000000..ebf3b7df9a --- /dev/null +++ b/lib/linalg/zpptrf.cpp @@ -0,0 +1,301 @@ +/* fortran/zpptrf.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static doublereal c_b16 = -1.; + +/* > \brief \b ZPPTRF */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZPPTRF + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZPPTRF( UPLO, N, AP, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 AP( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZPPTRF computes the Cholesky factorization of a complex Hermitian */ +/* > positive definite matrix A stored in packed format. */ +/* > */ +/* > The factorization has the form */ +/* > A = U**H * U, if UPLO = 'U', or */ +/* > A = L * L**H, if UPLO = 'L', */ +/* > where U is an upper triangular matrix and L is lower triangular. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A is stored; */ +/* > = 'L': Lower triangle of A is stored. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AP */ +/* > \verbatim */ +/* > AP is COMPLEX*16 array, dimension (N*(N+1)/2) */ +/* > On entry, the upper or lower triangle of the Hermitian matrix */ +/* > A, packed columnwise in a linear array. The j-th column of A */ +/* > is stored in the array AP as follows: */ +/* > if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* > if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ +/* > See below for further details. */ +/* > */ +/* > On exit, if INFO = 0, the triangular factor U or L from the */ +/* > Cholesky factorization A = U**H*U or A = L*L**H, in the same */ +/* > storage format as A. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the leading minor of order i is not */ +/* > positive definite, and the factorization could not be */ +/* > completed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > The packed storage scheme is illustrated by the following example */ +/* > when N = 4, UPLO = 'U': */ +/* > */ +/* > Two-dimensional storage of the Hermitian matrix A: */ +/* > */ +/* > a11 a12 a13 a14 */ +/* > a22 a23 a24 */ +/* > a33 a34 (aij = conjg(aji)) */ +/* > a44 */ +/* > */ +/* > Packed storage of the upper triangle of A: */ +/* > */ +/* > AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zpptrf_(char *uplo, integer *n, doublecomplex *ap, + integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + integer j, jc, jj; + doublereal ajj; + extern /* Subroutine */ int zhpr_(char *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + logical upper; + extern /* Subroutine */ int ztpsv_(char *, char *, char *, integer *, + doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen), zdscal_(integer *, + doublereal *, doublecomplex *, integer *); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPPTRF", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U**H * U. */ + + jj = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jc = jj + 1; + jj += j; + +/* Compute elements 1:J-1 of column J. */ + + if (j > 1) { + i__2 = j - 1; + ztpsv_((char *)"Upper", (char *)"Conjugate transpose", (char *)"Non-unit", &i__2, &ap[ + 1], &ap[jc], &c__1, (ftnlen)5, (ftnlen)19, (ftnlen)8); + } + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = jj; + i__3 = j - 1; + zdotc_(&z__1, &i__3, &ap[jc], &c__1, &ap[jc], &c__1); + ajj = ap[i__2].r - z__1.r; + if (ajj <= 0.) { + i__2 = jj; + ap[i__2].r = ajj, ap[i__2].i = 0.; + goto L30; + } + i__2 = jj; + d__1 = sqrt(ajj); + ap[i__2].r = d__1, ap[i__2].i = 0.; +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L * L**H. */ + + jj = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = jj; + ajj = ap[i__2].r; + if (ajj <= 0.) { + i__2 = jj; + ap[i__2].r = ajj, ap[i__2].i = 0.; + goto L30; + } + ajj = sqrt(ajj); + i__2 = jj; + ap[i__2].r = ajj, ap[i__2].i = 0.; + +/* Compute elements J+1:N of column J and update the trailing */ +/* submatrix. */ + + if (j < *n) { + i__2 = *n - j; + d__1 = 1. / ajj; + zdscal_(&i__2, &d__1, &ap[jj + 1], &c__1); + i__2 = *n - j; + zhpr_((char *)"Lower", &i__2, &c_b16, &ap[jj + 1], &c__1, &ap[jj + *n + - j + 1], (ftnlen)5); + jj = jj + *n - j + 1; + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of ZPPTRF */ + +} /* zpptrf_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zpptri.cpp b/lib/linalg/zpptri.cpp new file mode 100644 index 0000000000..fc11e435b1 --- /dev/null +++ b/lib/linalg/zpptri.cpp @@ -0,0 +1,248 @@ +/* fortran/zpptri.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublereal c_b8 = 1.; +static integer c__1 = 1; + +/* > \brief \b ZPPTRI */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZPPTRI + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZPPTRI( UPLO, N, AP, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 AP( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZPPTRI computes the inverse of a complex Hermitian positive definite */ +/* > matrix A using the Cholesky factorization A = U**H*U or A = L*L**H */ +/* > computed by ZPPTRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangular factor is stored in AP; */ +/* > = 'L': Lower triangular factor is stored in AP. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AP */ +/* > \verbatim */ +/* > AP is COMPLEX*16 array, dimension (N*(N+1)/2) */ +/* > On entry, the triangular factor U or L from the Cholesky */ +/* > factorization A = U**H*U or A = L*L**H, packed columnwise as */ +/* > a linear array. The j-th column of U or L is stored in the */ +/* > array AP as follows: */ +/* > if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; */ +/* > if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. */ +/* > */ +/* > On exit, the upper or lower triangle of the (Hermitian) */ +/* > inverse of A, overwriting the input factor U or L. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, the (i,i) element of the factor U or L is */ +/* > zero, and the inverse could not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zpptri_(char *uplo, integer *n, doublecomplex *ap, + integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1; + + /* Local variables */ + integer j, jc, jj; + doublereal ajj; + integer jjn; + extern /* Subroutine */ int zhpr_(char *, integer *, doublereal *, + doublecomplex *, integer *, doublecomplex *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *); + logical upper; + extern /* Subroutine */ int ztpmv_(char *, char *, char *, integer *, + doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen), zdscal_(integer *, + doublereal *, doublecomplex *, integer *), ztptri_(char *, char *, + integer *, doublecomplex *, integer *, ftnlen, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPPTRI", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Invert the triangular Cholesky factor U or L. */ + + ztptri_(uplo, (char *)"Non-unit", n, &ap[1], info, (ftnlen)1, (ftnlen)8); + if (*info > 0) { + return 0; + } + if (upper) { + +/* Compute the product inv(U) * inv(U)**H. */ + + jj = 0; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jc = jj + 1; + jj += j; + if (j > 1) { + i__2 = j - 1; + zhpr_((char *)"Upper", &i__2, &c_b8, &ap[jc], &c__1, &ap[1], (ftnlen) + 5); + } + i__2 = jj; + ajj = ap[i__2].r; + zdscal_(&j, &ajj, &ap[jc], &c__1); +/* L10: */ + } + + } else { + +/* Compute the product inv(L)**H * inv(L). */ + + jj = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + jjn = jj + *n - j + 1; + i__2 = jj; + i__3 = *n - j + 1; + zdotc_(&z__1, &i__3, &ap[jj], &c__1, &ap[jj], &c__1); + d__1 = z__1.r; + ap[i__2].r = d__1, ap[i__2].i = 0.; + if (j < *n) { + i__2 = *n - j; + ztpmv_((char *)"Lower", (char *)"Conjugate transpose", (char *)"Non-unit", &i__2, &ap[ + jjn], &ap[jj + 1], &c__1, (ftnlen)5, (ftnlen)19, ( + ftnlen)8); + } + jj = jjn; +/* L20: */ + } + } + + return 0; + +/* End of ZPPTRI */ + +} /* zpptri_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zscal.cpp b/lib/linalg/zscal.cpp new file mode 100644 index 0000000000..5985adebaf --- /dev/null +++ b/lib/linalg/zscal.cpp @@ -0,0 +1,162 @@ +/* fortran/zscal.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZSCAL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZSCAL(N,ZA,ZX,INCX) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ZA */ +/* INTEGER INCX,N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 ZX(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZSCAL scales a vector by a constant. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ZA */ +/* > \verbatim */ +/* > ZA is COMPLEX*16 */ +/* > On entry, ZA specifies the scalar alpha. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ZX */ +/* > \verbatim */ +/* > ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of ZX */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, 3/11/78. */ +/* > modified 3/93 to return if incx .le. 0. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zscal_(integer *n, doublecomplex *za, doublecomplex *zx, + integer *incx) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4; + doublecomplex z__1; + + /* Local variables */ + integer i__, nincx; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + /* Parameter adjustments */ + --zx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0 || za->r == 1. && za->i == 0.) { + return 0; + } + if (*incx == 1) { + +/* code for increment equal to 1 */ + + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * + zx[i__3].i + za->i * zx[i__3].r; + zx[i__2].r = z__1.r, zx[i__2].i = z__1.i; + } + } else { + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = i__; + i__4 = i__; + z__1.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__1.i = za->r * + zx[i__4].i + za->i * zx[i__4].r; + zx[i__3].r = z__1.r, zx[i__3].i = z__1.i; + } + } + return 0; + +/* End of ZSCAL */ + +} /* zscal_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zstedc.cpp b/lib/linalg/zstedc.cpp new file mode 100644 index 0000000000..fb52b344b1 --- /dev/null +++ b/lib/linalg/zstedc.cpp @@ -0,0 +1,590 @@ +/* fortran/zstedc.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__9 = 9; +static integer c__0 = 0; +static integer c__2 = 2; +static doublereal c_b17 = 0.; +static doublereal c_b18 = 1.; +static integer c__1 = 1; + +/* > \brief \b ZSTEDC */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZSTEDC + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, */ +/* LRWORK, IWORK, LIWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER COMPZ */ +/* INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* INTEGER IWORK( * ) */ +/* DOUBLE PRECISION D( * ), E( * ), RWORK( * ) */ +/* COMPLEX*16 WORK( * ), Z( LDZ, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a */ +/* > symmetric tridiagonal matrix using the divide and conquer method. */ +/* > The eigenvectors of a full or band complex Hermitian matrix can also */ +/* > be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this */ +/* > matrix to tridiagonal form. */ +/* > */ +/* > This code makes very mild assumptions about floating point */ +/* > arithmetic. It will work on machines with a guard digit in */ +/* > add/subtract, or on those binary machines without guard digits */ +/* > which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ +/* > It could conceivably fail on hexadecimal or decimal machines */ +/* > without guard digits, but we know of none. See DLAED3 for details. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only. */ +/* > = 'I': Compute eigenvectors of tridiagonal matrix also. */ +/* > = 'V': Compute eigenvectors of original Hermitian matrix */ +/* > also. On entry, Z contains the unitary matrix used */ +/* > to reduce the original matrix to tridiagonal form. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The dimension of the symmetric tridiagonal matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the diagonal elements of the tridiagonal matrix. */ +/* > On exit, if INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the subdiagonal elements of the tridiagonal matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ,N) */ +/* > On entry, if COMPZ = 'V', then Z contains the unitary */ +/* > matrix used in the reduction to tridiagonal form. */ +/* > On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */ +/* > orthonormal eigenvectors of the original Hermitian matrix, */ +/* > and if COMPZ = 'I', Z contains the orthonormal eigenvectors */ +/* > of the symmetric tridiagonal matrix. */ +/* > If COMPZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1. */ +/* > If eigenvectors are desired, then LDZ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. */ +/* > If COMPZ = 'V' and N > 1, LWORK must be at least N*N. */ +/* > Note that for COMPZ = 'V', then if N is less than or */ +/* > equal to the minimum divide size, usually 25, then LWORK need */ +/* > only be 1. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal sizes of the WORK, RWORK and */ +/* > IWORK arrays, returns these values as the first entries of */ +/* > the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] RWORK */ +/* > \verbatim */ +/* > RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) */ +/* > On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LRWORK */ +/* > \verbatim */ +/* > LRWORK is INTEGER */ +/* > The dimension of the array RWORK. */ +/* > If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. */ +/* > If COMPZ = 'V' and N > 1, LRWORK must be at least */ +/* > 1 + 3*N + 2*N*lg N + 4*N**2 , */ +/* > where lg( N ) = smallest integer k such */ +/* > that 2**k >= N. */ +/* > If COMPZ = 'I' and N > 1, LRWORK must be at least */ +/* > 1 + 4*N + 2*N**2 . */ +/* > Note that for COMPZ = 'I' or 'V', then if N is less than or */ +/* > equal to the minimum divide size, usually 25, then LRWORK */ +/* > need only be max(1,2*(N-1)). */ +/* > */ +/* > If LRWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] IWORK */ +/* > \verbatim */ +/* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ +/* > On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LIWORK */ +/* > \verbatim */ +/* > LIWORK is INTEGER */ +/* > The dimension of the array IWORK. */ +/* > If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. */ +/* > If COMPZ = 'V' or N > 1, LIWORK must be at least */ +/* > 6 + 6*N + 5*N*lg N. */ +/* > If COMPZ = 'I' or N > 1, LIWORK must be at least */ +/* > 3 + 5*N . */ +/* > Note that for COMPZ = 'I' or 'V', then if N is less than or */ +/* > equal to the minimum divide size, usually 25, then LIWORK */ +/* > need only be 1. */ +/* > */ +/* > If LIWORK = -1, then a workspace query is assumed; the */ +/* > routine only calculates the optimal sizes of the WORK, RWORK */ +/* > and IWORK arrays, returns these values as the first entries */ +/* > of the WORK, RWORK and IWORK arrays, and no error message */ +/* > related to LWORK or LRWORK or LIWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit. */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value. */ +/* > > 0: The algorithm failed to compute an eigenvalue while */ +/* > working on the submatrix lying in rows and columns */ +/* > INFO/(N+1) through mod(INFO,N+1). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Contributors: */ +/* ================== */ +/* > */ +/* > Jeff Rutter, Computer Science Division, University of California */ +/* > at Berkeley, USA */ + +/* ===================================================================== */ +/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work, + integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork, + integer *liwork, integer *info, ftnlen compz_len) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2; + + /* Builtin functions */ + double log(doublereal); + integer pow_ii(integer *, integer *); + double sqrt(doublereal); + + /* Local variables */ + integer i__, j, k, m; + doublereal p; + integer ii, ll, lgn; + doublereal eps, tiny; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer lwmin, start; + extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *), zlaed0_(integer *, integer *, + doublereal *, doublereal *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, integer *, integer *); + extern doublereal dlamch_(char *, ftnlen); + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen), dstedc_(char *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, + integer *, integer *, integer *, integer *, ftnlen), dlaset_( + char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, ftnlen), xerbla_(char *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer finish; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, + ftnlen); + extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, + integer *), zlacrm_(integer *, integer *, doublecomplex *, + integer *, doublereal *, integer *, doublecomplex *, integer *, + doublereal *); + integer liwmin, icompz; + extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, + ftnlen), zlacpy_(char *, integer *, integer *, doublecomplex *, + integer *, doublecomplex *, integer *, ftnlen); + doublereal orgnrm; + integer lrwmin; + logical lquery; + integer smlsiz; + extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *, + doublereal *, doublecomplex *, integer *, doublereal *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + --rwork; + --iwork; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1; + + if (lsame_(compz, (char *)"N", (ftnlen)1, (ftnlen)1)) { + icompz = 0; + } else if (lsame_(compz, (char *)"V", (ftnlen)1, (ftnlen)1)) { + icompz = 1; + } else if (lsame_(compz, (char *)"I", (ftnlen)1, (ftnlen)1)) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + smlsiz = ilaenv_(&c__9, (char *)"ZSTEDC", (char *)" ", &c__0, &c__0, &c__0, &c__0, ( + ftnlen)6, (ftnlen)1); + if (*n <= 1 || icompz == 0) { + lwmin = 1; + liwmin = 1; + lrwmin = 1; + } else if (*n <= smlsiz) { + lwmin = 1; + liwmin = 1; + lrwmin = *n - 1 << 1; + } else if (icompz == 1) { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + if (pow_ii(&c__2, &lgn) < *n) { + ++lgn; + } + lwmin = *n * *n; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = *n * 3 + 1 + (*n << 1) * lgn + (i__1 * i__1 << 2); + liwmin = *n * 6 + 6 + *n * 5 * lgn; + } else if (icompz == 2) { + lwmin = 1; +/* Computing 2nd power */ + i__1 = *n; + lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1); + liwmin = *n * 5 + 3; + } + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + if (*lwork < lwmin && ! lquery) { + *info = -8; + } else if (*lrwork < lrwmin && ! lquery) { + *info = -10; + } else if (*liwork < liwmin && ! lquery) { + *info = -12; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSTEDC", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + if (*n == 1) { + if (icompz != 0) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* If the following conditional clause is removed, then the routine */ +/* will use the Divide and Conquer routine to compute only the */ +/* eigenvalues, which requires (3N + 3N**2) real workspace and */ +/* (2 + 5N + 2N lg(N)) integer workspace. */ +/* Since on many architectures DSTERF is much faster than any other */ +/* algorithm for finding eigenvalues only, it is used here */ +/* as the default. If the conditional clause is removed, then */ +/* information on the size of workspace needs to be changed. */ + +/* If COMPZ = 'N', use DSTERF to compute the eigenvalues. */ + + if (icompz == 0) { + dsterf_(n, &d__[1], &e[1], info); + goto L70; + } + +/* If N is smaller than the minimum divide size (SMLSIZ+1), then */ +/* solve the problem with another solver. */ + + if (*n <= smlsiz) { + + zsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1], + info, (ftnlen)1); + + } else { + +/* If COMPZ = 'I', we simply call DSTEDC instead. */ + + if (icompz == 2) { + dlaset_((char *)"Full", n, n, &c_b17, &c_b18, &rwork[1], n, (ftnlen)4); + ll = *n * *n + 1; + i__1 = *lrwork - ll + 1; + dstedc_((char *)"I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, & + iwork[1], liwork, info, (ftnlen)1); + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * z_dim1; + i__4 = (j - 1) * *n + i__; + z__[i__3].r = rwork[i__4], z__[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + goto L70; + } + +/* From now on, only option left to be handled is COMPZ = 'V', */ +/* i.e. ICOMPZ = 1. */ + +/* Scale. */ + + orgnrm = dlanst_((char *)"M", n, &d__[1], &e[1], (ftnlen)1); + if (orgnrm == 0.) { + goto L70; + } + + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + + start = 1; + +/* while ( START <= N ) */ + +L30: + if (start <= *n) { + +/* Let FINISH be the position of the next subdiagonal entry */ +/* such that E( FINISH ) <= TINY or FINISH = N if no such */ +/* subdiagonal exists. The matrix identified by the elements */ +/* between START and FINISH constitutes an independent */ +/* sub-problem. */ + + finish = start; +L40: + if (finish < *n) { + tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt(( + d__2 = d__[finish + 1], abs(d__2))); + if ((d__1 = e[finish], abs(d__1)) > tiny) { + ++finish; + goto L40; + } + } + +/* (Sub) Problem determined. Compute its size and solve it. */ + + m = finish - start + 1; + if (m > smlsiz) { + +/* Scale. */ + + orgnrm = dlanst_((char *)"M", &m, &d__[start], &e[start], (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b18, &m, &c__1, &d__[ + start], &m, info, (ftnlen)1); + i__1 = m - 1; + i__2 = m - 1; + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b18, &i__1, &c__1, &e[ + start], &i__2, info, (ftnlen)1); + + zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + + 1], ldz, &work[1], n, &rwork[1], &iwork[1], info); + if (*info > 0) { + *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % + (m + 1) + start - 1; + goto L70; + } + +/* Scale back. */ + + dlascl_((char *)"G", &c__0, &c__0, &c_b18, &orgnrm, &m, &c__1, &d__[ + start], &m, info, (ftnlen)1); + + } else { + dsteqr_((char *)"I", &m, &d__[start], &e[start], &rwork[1], &m, & + rwork[m * m + 1], info, (ftnlen)1); + zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, & + work[1], n, &rwork[m * m + 1]); + zlacpy_((char *)"A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], + ldz, (ftnlen)1); + if (*info > 0) { + *info = start * (*n + 1) + finish; + goto L70; + } + } + + start = finish + 1; + goto L30; + } + +/* endwhile */ + + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L50: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L60: */ + } + } + +L70: + work[1].r = (doublereal) lwmin, work[1].i = 0.; + rwork[1] = (doublereal) lrwmin; + iwork[1] = liwmin; + + return 0; + +/* End of ZSTEDC */ + +} /* zstedc_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zsteqr.cpp b/lib/linalg/zsteqr.cpp new file mode 100644 index 0000000000..21cb31cc10 --- /dev/null +++ b/lib/linalg/zsteqr.cpp @@ -0,0 +1,706 @@ +/* fortran/zsteqr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublecomplex c_b1 = {0.,0.}; +static doublecomplex c_b2 = {1.,0.}; +static integer c__0 = 0; +static integer c__1 = 1; +static integer c__2 = 2; +static doublereal c_b41 = 1.; + +/* > \brief \b ZSTEQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZSTEQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER COMPZ */ +/* INTEGER INFO, LDZ, N */ +/* .. */ +/* .. Array Arguments .. */ +/* DOUBLE PRECISION D( * ), E( * ), WORK( * ) */ +/* COMPLEX*16 Z( LDZ, * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a */ +/* > symmetric tridiagonal matrix using the implicit QL or QR method. */ +/* > The eigenvectors of a full or band complex Hermitian matrix can also */ +/* > be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this */ +/* > matrix to tridiagonal form. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] COMPZ */ +/* > \verbatim */ +/* > COMPZ is CHARACTER*1 */ +/* > = 'N': Compute eigenvalues only. */ +/* > = 'V': Compute eigenvalues and eigenvectors of the original */ +/* > Hermitian matrix. On entry, Z must contain the */ +/* > unitary matrix used to reduce the original matrix */ +/* > to tridiagonal form. */ +/* > = 'I': Compute eigenvalues and eigenvectors of the */ +/* > tridiagonal matrix. Z is initialized to the identity */ +/* > matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] D */ +/* > \verbatim */ +/* > D is DOUBLE PRECISION array, dimension (N) */ +/* > On entry, the diagonal elements of the tridiagonal matrix. */ +/* > On exit, if INFO = 0, the eigenvalues in ascending order. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] E */ +/* > \verbatim */ +/* > E is DOUBLE PRECISION array, dimension (N-1) */ +/* > On entry, the (n-1) subdiagonal elements of the tridiagonal */ +/* > matrix. */ +/* > On exit, E has been destroyed. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] Z */ +/* > \verbatim */ +/* > Z is COMPLEX*16 array, dimension (LDZ, N) */ +/* > On entry, if COMPZ = 'V', then Z contains the unitary */ +/* > matrix used in the reduction to tridiagonal form. */ +/* > On exit, if INFO = 0, then if COMPZ = 'V', Z contains the */ +/* > orthonormal eigenvectors of the original Hermitian matrix, */ +/* > and if COMPZ = 'I', Z contains the orthonormal eigenvectors */ +/* > of the symmetric tridiagonal matrix. */ +/* > If COMPZ = 'N', then Z is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDZ */ +/* > \verbatim */ +/* > LDZ is INTEGER */ +/* > The leading dimension of the array Z. LDZ >= 1, and if */ +/* > eigenvectors are desired, then LDZ >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is DOUBLE PRECISION array, dimension (max(1,2*N-2)) */ +/* > If COMPZ = 'N', then WORK is not referenced. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: the algorithm has failed to find all the eigenvalues in */ +/* > a total of 30*N iterations; if INFO = i, then i */ +/* > elements of E have not converged to zero; on exit, D */ +/* > and E contain the elements of a symmetric tridiagonal */ +/* > matrix which is unitarily similar to the original */ +/* > matrix. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__, + doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work, + integer *info, ftnlen compz_len) +{ + /* System generated locals */ + integer z_dim1, z_offset, i__1, i__2; + doublereal d__1, d__2; + + /* Builtin functions */ + double sqrt(doublereal), d_sign(doublereal *, doublereal *); + + /* Local variables */ + doublereal b, c__, f, g; + integer i__, j, k, l, m; + doublereal p, r__, s; + integer l1, ii, mm, lm1, mm1, nm1; + doublereal rt1, rt2, eps; + integer lsv; + doublereal tst, eps2; + integer lend, jtot; + extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal + *, doublereal *, doublereal *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal anorm; + extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *, + integer *, doublereal *, doublereal *, doublecomplex *, integer *, + ftnlen, ftnlen, ftnlen), zswap_(integer *, doublecomplex *, + integer *, doublecomplex *, integer *), dlaev2_(doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *); + integer lendm1, lendp1; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, + ftnlen); + integer iscale; + extern /* Subroutine */ int dlascl_(char *, integer *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen); + doublereal safmin; + extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *); + doublereal safmax; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, + ftnlen); + extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *, + integer *, ftnlen); + integer lendsv; + doublereal ssfmin; + integer nmaxit, icompz; + doublereal ssfmax; + extern /* Subroutine */ int zlaset_(char *, integer *, integer *, + doublecomplex *, doublecomplex *, doublecomplex *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --d__; + --e; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + + /* Function Body */ + *info = 0; + + if (lsame_(compz, (char *)"N", (ftnlen)1, (ftnlen)1)) { + icompz = 0; + } else if (lsame_(compz, (char *)"V", (ftnlen)1, (ftnlen)1)) { + icompz = 1; + } else if (lsame_(compz, (char *)"I", (ftnlen)1, (ftnlen)1)) { + icompz = 2; + } else { + icompz = -1; + } + if (icompz < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSTEQR", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (*n == 1) { + if (icompz == 2) { + i__1 = z_dim1 + 1; + z__[i__1].r = 1., z__[i__1].i = 0.; + } + return 0; + } + +/* Determine the unit roundoff and over/underflow thresholds. */ + + eps = dlamch_((char *)"E", (ftnlen)1); +/* Computing 2nd power */ + d__1 = eps; + eps2 = d__1 * d__1; + safmin = dlamch_((char *)"S", (ftnlen)1); + safmax = 1. / safmin; + ssfmax = sqrt(safmax) / 3.; + ssfmin = sqrt(safmin) / eps2; + +/* Compute the eigenvalues and eigenvectors of the tridiagonal */ +/* matrix. */ + + if (icompz == 2) { + zlaset_((char *)"Full", n, n, &c_b1, &c_b2, &z__[z_offset], ldz, (ftnlen)4); + } + + nmaxit = *n * 30; + jtot = 0; + +/* Determine where the matrix splits and choose QL or QR iteration */ +/* for each block, according to whether top or bottom diagonal */ +/* element is smaller. */ + + l1 = 1; + nm1 = *n - 1; + +L10: + if (l1 > *n) { + goto L160; + } + if (l1 > 1) { + e[l1 - 1] = 0.; + } + if (l1 <= nm1) { + i__1 = nm1; + for (m = l1; m <= i__1; ++m) { + tst = (d__1 = e[m], abs(d__1)); + if (tst == 0.) { + goto L30; + } + if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m + + 1], abs(d__2))) * eps) { + e[m] = 0.; + goto L30; + } +/* L20: */ + } + } + m = *n; + +L30: + l = l1; + lsv = l; + lend = m; + lendsv = lend; + l1 = m + 1; + if (lend == l) { + goto L10; + } + +/* Scale submatrix in rows and columns L to LEND */ + + i__1 = lend - l + 1; + anorm = dlanst_((char *)"I", &i__1, &d__[l], &e[l], (ftnlen)1); + iscale = 0; + if (anorm == 0.) { + goto L10; + } + if (anorm > ssfmax) { + iscale = 1; + i__1 = lend - l + 1; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n, + info, (ftnlen)1); + i__1 = lend - l; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n, + info, (ftnlen)1); + } else if (anorm < ssfmin) { + iscale = 2; + i__1 = lend - l + 1; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n, + info, (ftnlen)1); + i__1 = lend - l; + dlascl_((char *)"G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n, + info, (ftnlen)1); + } + +/* Choose between QL and QR iteration */ + + if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) { + lend = lsv; + l = lendsv; + } + + if (lend > l) { + +/* QL Iteration */ + +/* Look for small subdiagonal element. */ + +L40: + if (l != lend) { + lendm1 = lend - 1; + i__1 = lendm1; + for (m = l; m <= i__1; ++m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + + 1], abs(d__2)) + safmin) { + goto L60; + } +/* L50: */ + } + } + + m = lend; + +L60: + if (m < lend) { + e[m] = 0.; + } + p = d__[l]; + if (m == l) { + goto L80; + } + +/* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 */ +/* to compute its eigensystem. */ + + if (m == l + 1) { + if (icompz > 0) { + dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s); + work[l] = c__; + work[*n - 1 + l] = s; + zlasr_((char *)"R", (char *)"V", (char *)"B", n, &c__2, &work[l], &work[*n - 1 + l], & + z__[l * z_dim1 + 1], ldz, (ftnlen)1, (ftnlen)1, ( + ftnlen)1); + } else { + dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2); + } + d__[l] = rt1; + d__[l + 1] = rt2; + e[l] = 0.; + l += 2; + if (l <= lend) { + goto L40; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l + 1] - p) / (e[l] * 2.); + r__ = dlapy2_(&g, &c_b41); + g = d__[m] - p + e[l] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + mm1 = m - 1; + i__1 = l; + for (i__ = mm1; i__ >= i__1; --i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m - 1) { + e[i__ + 1] = r__; + } + g = d__[i__ + 1] - p; + r__ = (d__[i__] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__ + 1] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = -s; + } + +/* L70: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = m - l + 1; + zlasr_((char *)"R", (char *)"V", (char *)"B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l + * z_dim1 + 1], ldz, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + + d__[l] -= p; + e[l] = g; + goto L40; + +/* Eigenvalue found. */ + +L80: + d__[l] = p; + + ++l; + if (l <= lend) { + goto L40; + } + goto L140; + + } else { + +/* QR Iteration */ + +/* Look for small superdiagonal element. */ + +L90: + if (l != lend) { + lendp1 = lend + 1; + i__1 = lendp1; + for (m = l; m >= i__1; --m) { +/* Computing 2nd power */ + d__2 = (d__1 = e[m - 1], abs(d__1)); + tst = d__2 * d__2; + if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m + - 1], abs(d__2)) + safmin) { + goto L110; + } +/* L100: */ + } + } + + m = lend; + +L110: + if (m > lend) { + e[m - 1] = 0.; + } + p = d__[l]; + if (m == l) { + goto L130; + } + +/* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 */ +/* to compute its eigensystem. */ + + if (m == l - 1) { + if (icompz > 0) { + dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s) + ; + work[m] = c__; + work[*n - 1 + m] = s; + zlasr_((char *)"R", (char *)"V", (char *)"F", n, &c__2, &work[m], &work[*n - 1 + m], & + z__[(l - 1) * z_dim1 + 1], ldz, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + } else { + dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2); + } + d__[l - 1] = rt1; + d__[l] = rt2; + e[l - 1] = 0.; + l += -2; + if (l >= lend) { + goto L90; + } + goto L140; + } + + if (jtot == nmaxit) { + goto L140; + } + ++jtot; + +/* Form shift. */ + + g = (d__[l - 1] - p) / (e[l - 1] * 2.); + r__ = dlapy2_(&g, &c_b41); + g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g)); + + s = 1.; + c__ = 1.; + p = 0.; + +/* Inner loop */ + + lm1 = l - 1; + i__1 = lm1; + for (i__ = m; i__ <= i__1; ++i__) { + f = s * e[i__]; + b = c__ * e[i__]; + dlartg_(&g, &f, &c__, &s, &r__); + if (i__ != m) { + e[i__ - 1] = r__; + } + g = d__[i__] - p; + r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b; + p = s * r__; + d__[i__] = g + p; + g = c__ * r__ - b; + +/* If eigenvectors are desired, then save rotations. */ + + if (icompz > 0) { + work[i__] = c__; + work[*n - 1 + i__] = s; + } + +/* L120: */ + } + +/* If eigenvectors are desired, then apply saved rotations. */ + + if (icompz > 0) { + mm = l - m + 1; + zlasr_((char *)"R", (char *)"V", (char *)"F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m + * z_dim1 + 1], ldz, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + + d__[l] -= p; + e[lm1] = g; + goto L90; + +/* Eigenvalue found. */ + +L130: + d__[l] = p; + + --l; + if (l >= lend) { + goto L90; + } + goto L140; + + } + +/* Undo scaling if necessary */ + +L140: + if (iscale == 1) { + i__1 = lendsv - lsv + 1; + dlascl_((char *)"G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv], + n, info, (ftnlen)1); + i__1 = lendsv - lsv; + dlascl_((char *)"G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n, + info, (ftnlen)1); + } else if (iscale == 2) { + i__1 = lendsv - lsv + 1; + dlascl_((char *)"G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv], + n, info, (ftnlen)1); + i__1 = lendsv - lsv; + dlascl_((char *)"G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n, + info, (ftnlen)1); + } + +/* Check for no convergence to an eigenvalue after a total */ +/* of N*MAXIT iterations. */ + + if (jtot == nmaxit) { + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + if (e[i__] != 0.) { + ++(*info); + } +/* L150: */ + } + return 0; + } + goto L10; + +/* Order eigenvalues and eigenvectors. */ + +L160: + if (icompz == 0) { + +/* Use Quick Sort */ + + dlasrt_((char *)"I", n, &d__[1], info, (ftnlen)1); + + } else { + +/* Use Selection Sort to minimize swaps of eigenvectors */ + + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + k = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] < p) { + k = j; + p = d__[j]; + } +/* L170: */ + } + if (k != i__) { + d__[k] = d__[i__]; + d__[i__] = p; + zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1], + &c__1); + } +/* L180: */ + } + } + return 0; + +/* End of ZSTEQR */ + +} /* zsteqr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zswap.cpp b/lib/linalg/zswap.cpp new file mode 100644 index 0000000000..d00bb044fd --- /dev/null +++ b/lib/linalg/zswap.cpp @@ -0,0 +1,176 @@ +/* fortran/zswap.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZSWAP */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZSWAP(N,ZX,INCX,ZY,INCY) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,INCY,N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 ZX(*),ZY(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZSWAP interchanges two vectors. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > number of elements in input vector(s) */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ZX */ +/* > \verbatim */ +/* > ZX is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCX ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > storage spacing between elements of ZX */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] ZY */ +/* > \verbatim */ +/* > ZY is COMPLEX*16 array, dimension ( 1 + ( N - 1 )*abs( INCY ) ) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCY */ +/* > \verbatim */ +/* > INCY is INTEGER */ +/* > storage spacing between elements of ZY */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level1 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > jack dongarra, 3/11/78. */ +/* > modified 12/3/93, array(1) declarations changed to array(*) */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int zswap_(integer *n, doublecomplex *zx, integer *incx, + doublecomplex *zy, integer *incy) +{ + /* System generated locals */ + integer i__1, i__2, i__3; + + /* Local variables */ + integer i__, ix, iy; + doublecomplex ztemp; + + +/* -- Reference BLAS level1 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ + /* Parameter adjustments */ + --zy; + --zx; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + +/* code for both increments equal to 1 */ + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i; + i__2 = i__; + i__3 = i__; + zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i; + i__2 = i__; + zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i; + } + } else { + +/* code for unequal increments or equal increments not equal */ +/* to 1 */ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i; + i__2 = ix; + i__3 = iy; + zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i; + i__2 = iy; + zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i; + ix += *incx; + iy += *incy; + } + } + return 0; + +/* End of ZSWAP */ + +} /* zswap_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ztpmv.cpp b/lib/linalg/ztpmv.cpp new file mode 100644 index 0000000000..a29145d6bd --- /dev/null +++ b/lib/linalg/ztpmv.cpp @@ -0,0 +1,639 @@ +/* fortran/ztpmv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZTPMV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,N */ +/* CHARACTER DIAG,TRANS,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 AP(*),X(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZTPMV performs one of the matrix-vector operations */ +/* > */ +/* > x := A*x, or x := A**T*x, or x := A**H*x, */ +/* > */ +/* > where x is an n element vector and A is an n by n unit, or non-unit, */ +/* > upper or lower triangular matrix, supplied in packed form. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the matrix is an upper or */ +/* > lower triangular matrix as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' A is an upper triangular matrix. */ +/* > */ +/* > UPLO = 'L' or 'l' A is a lower triangular matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' x := A*x. */ +/* > */ +/* > TRANS = 'T' or 't' x := A**T*x. */ +/* > */ +/* > TRANS = 'C' or 'c' x := A**H*x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > On entry, DIAG specifies whether or not A is unit */ +/* > triangular as follows: */ +/* > */ +/* > DIAG = 'U' or 'u' A is assumed to be unit triangular. */ +/* > */ +/* > DIAG = 'N' or 'n' A is not assumed to be unit */ +/* > triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AP */ +/* > \verbatim */ +/* > AP is COMPLEX*16 array, dimension at least */ +/* > ( ( n*( n + 1 ) )/2 ). */ +/* > Before entry with UPLO = 'U' or 'u', the array AP must */ +/* > contain the upper triangular matrix packed sequentially, */ +/* > column by column, so that AP( 1 ) contains a( 1, 1 ), */ +/* > AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ +/* > respectively, and so on. */ +/* > Before entry with UPLO = 'L' or 'l', the array AP must */ +/* > contain the lower triangular matrix packed sequentially, */ +/* > column by column, so that AP( 1 ) contains a( 1, 1 ), */ +/* > AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ +/* > respectively, and so on. */ +/* > Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* > A are not referenced, but are assumed to be unity. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element vector x. On exit, X is overwritten with the */ +/* > transformed vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > The vector and matrix arguments are not referenced when N = 0, or M = 0 */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int ztpmv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *ap, doublecomplex *x, integer *incx, ftnlen uplo_len, + ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, k, kk, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --ap; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*incx == 0) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"ZTPMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of AP are */ +/* accessed sequentially with one pass through AP. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form x:= A*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + kk = 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5] + .i, z__2.i = temp.r * ap[i__5].i + temp.i + * ap[i__5].r; + z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + ++k; +/* L10: */ + } + if (nounit) { + i__2 = j; + i__3 = j; + i__4 = kk + j - 1; + z__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[ + i__4].i, z__1.i = x[i__3].r * ap[i__4].i + + x[i__3].i * ap[i__4].r; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + } + kk += j; +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = kx; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + i__3 = ix; + i__4 = ix; + i__5 = k; + z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5] + .i, z__2.i = temp.r * ap[i__5].i + temp.i + * ap[i__5].r; + z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + if (nounit) { + i__2 = jx; + i__3 = jx; + i__4 = kk + j - 1; + z__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[ + i__4].i, z__1.i = x[i__3].r * ap[i__4].i + + x[i__3].i * ap[i__4].r; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + } + jx += *incx; + kk += j; +/* L40: */ + } + } + } else { + kk = *n * (*n + 1) / 2; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + k = kk; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__; + i__3 = i__; + i__4 = k; + z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4] + .i, z__2.i = temp.r * ap[i__4].i + temp.i + * ap[i__4].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + --k; +/* L50: */ + } + if (nounit) { + i__1 = j; + i__2 = j; + i__3 = kk - *n + j; + z__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[ + i__3].i, z__1.i = x[i__2].r * ap[i__3].i + + x[i__2].i * ap[i__3].r; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + kk -= *n - j + 1; +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = kx; + i__1 = kk - (*n - (j + 1)); + for (k = kk; k >= i__1; --k) { + i__2 = ix; + i__3 = ix; + i__4 = k; + z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4] + .i, z__2.i = temp.r * ap[i__4].i + temp.i + * ap[i__4].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + ix -= *incx; +/* L70: */ + } + if (nounit) { + i__1 = jx; + i__2 = jx; + i__3 = kk - *n + j; + z__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[ + i__3].i, z__1.i = x[i__2].r * ap[i__3].i + + x[i__2].i * ap[i__3].r; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + jx -= *incx; + kk -= *n - j + 1; +/* L80: */ + } + } + } + } else { + +/* Form x := A**T*x or x := A**H*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + kk = *n * (*n + 1) / 2; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + k = kk - 1; + if (noconj) { + if (nounit) { + i__1 = kk; + z__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1] + .i, z__1.i = temp.r * ap[i__1].i + temp.i + * ap[i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = k; + i__2 = i__; + z__2.r = ap[i__1].r * x[i__2].r - ap[i__1].i * x[ + i__2].i, z__2.i = ap[i__1].r * x[i__2].i + + ap[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + --k; +/* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &ap[kk]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + d_cnjg(&z__3, &ap[k]); + i__1 = i__; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + --k; +/* L100: */ + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; + kk -= j; +/* L110: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + if (noconj) { + if (nounit) { + i__1 = kk; + z__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1] + .i, z__1.i = temp.r * ap[i__1].i + temp.i + * ap[i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = kk - j + 1; + for (k = kk - 1; k >= i__1; --k) { + ix -= *incx; + i__2 = k; + i__3 = ix; + z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[ + i__3].i, z__2.i = ap[i__2].r * x[i__3].i + + ap[i__2].i * x[i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L120: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &ap[kk]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = kk - j + 1; + for (k = kk - 1; k >= i__1; --k) { + ix -= *incx; + d_cnjg(&z__3, &ap[k]); + i__2 = ix; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[ + i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; + kk -= j; +/* L140: */ + } + } + } else { + kk = 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + k = kk + 1; + if (noconj) { + if (nounit) { + i__2 = kk; + z__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2] + .i, z__1.i = temp.r * ap[i__2].i + temp.i + * ap[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = k; + i__4 = i__; + z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[ + i__4].i, z__2.i = ap[i__3].r * x[i__4].i + + ap[i__3].i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ++k; +/* L150: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &ap[kk]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &ap[k]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ++k; +/* L160: */ + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; + kk += *n - j + 1; +/* L170: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + if (noconj) { + if (nounit) { + i__2 = kk; + z__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2] + .i, z__1.i = temp.r * ap[i__2].i + temp.i + * ap[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + i__3 = k; + i__4 = ix; + z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[ + i__4].i, z__2.i = ap[i__3].r * x[i__4].i + + ap[i__3].i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L180: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &ap[kk]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + d_cnjg(&z__3, &ap[k]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L190: */ + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; + kk += *n - j + 1; +/* L200: */ + } + } + } + } + + return 0; + +/* End of ZTPMV */ + +} /* ztpmv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ztpsv.cpp b/lib/linalg/ztpsv.cpp new file mode 100644 index 0000000000..7e7e64d721 --- /dev/null +++ b/lib/linalg/ztpsv.cpp @@ -0,0 +1,607 @@ +/* fortran/ztpsv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZTPSV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,N */ +/* CHARACTER DIAG,TRANS,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 AP(*),X(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZTPSV solves one of the systems of equations */ +/* > */ +/* > A*x = b, or A**T*x = b, or A**H*x = b, */ +/* > */ +/* > where b and x are n element vectors and A is an n by n unit, or */ +/* > non-unit, upper or lower triangular matrix, supplied in packed form. */ +/* > */ +/* > No test for singularity or near-singularity is included in this */ +/* > routine. Such tests must be performed before calling this routine. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the matrix is an upper or */ +/* > lower triangular matrix as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' A is an upper triangular matrix. */ +/* > */ +/* > UPLO = 'L' or 'l' A is a lower triangular matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the equations to be solved as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' A*x = b. */ +/* > */ +/* > TRANS = 'T' or 't' A**T*x = b. */ +/* > */ +/* > TRANS = 'C' or 'c' A**H*x = b. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > On entry, DIAG specifies whether or not A is unit */ +/* > triangular as follows: */ +/* > */ +/* > DIAG = 'U' or 'u' A is assumed to be unit triangular. */ +/* > */ +/* > DIAG = 'N' or 'n' A is not assumed to be unit */ +/* > triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] AP */ +/* > \verbatim */ +/* > AP is COMPLEX*16 array, dimension at least */ +/* > ( ( n*( n + 1 ) )/2 ). */ +/* > Before entry with UPLO = 'U' or 'u', the array AP must */ +/* > contain the upper triangular matrix packed sequentially, */ +/* > column by column, so that AP( 1 ) contains a( 1, 1 ), */ +/* > AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ +/* > respectively, and so on. */ +/* > Before entry with UPLO = 'L' or 'l', the array AP must */ +/* > contain the lower triangular matrix packed sequentially, */ +/* > column by column, so that AP( 1 ) contains a( 1, 1 ), */ +/* > AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ +/* > respectively, and so on. */ +/* > Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* > A are not referenced, but are assumed to be unity. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element right-hand side vector b. On exit, X is overwritten */ +/* > with the solution vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int ztpsv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *ap, doublecomplex *x, integer *incx, ftnlen uplo_len, + ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( + doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, k, kk, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --x; + --ap; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*incx == 0) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"ZTPSV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of AP are */ +/* accessed sequentially with one pass through AP. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form x := inv( A )*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + kk = *n * (*n + 1) / 2; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = j; + z_div(&z__1, &x[j], &ap[kk]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + k = kk - 1; + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__; + i__2 = i__; + i__3 = k; + z__2.r = temp.r * ap[i__3].r - temp.i * ap[i__3] + .i, z__2.i = temp.r * ap[i__3].i + temp.i + * ap[i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - + z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + --k; +/* L10: */ + } + } + kk -= j; +/* L20: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = jx; + z_div(&z__1, &x[jx], &ap[kk]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + i__1 = kk - j + 1; + for (k = kk - 1; k >= i__1; --k) { + ix -= *incx; + i__2 = ix; + i__3 = ix; + i__4 = k; + z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4] + .i, z__2.i = temp.r * ap[i__4].i + temp.i + * ap[i__4].r; + z__1.r = x[i__3].r - z__2.r, z__1.i = x[i__3].i - + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; +/* L30: */ + } + } + jx -= *incx; + kk -= j; +/* L40: */ + } + } + } else { + kk = 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = j; + z_div(&z__1, &x[j], &ap[kk]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5] + .i, z__2.i = temp.r * ap[i__5].i + temp.i + * ap[i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + ++k; +/* L50: */ + } + } + kk += *n - j + 1; +/* L60: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = jx; + z_div(&z__1, &x[jx], &ap[kk]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + i__3 = ix; + i__4 = ix; + i__5 = k; + z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5] + .i, z__2.i = temp.r * ap[i__5].i + temp.i + * ap[i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L70: */ + } + } + jx += *incx; + kk += *n - j + 1; +/* L80: */ + } + } + } + } else { + +/* Form x := inv( A**T )*x or x := inv( A**H )*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + kk = 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + k = kk; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = k; + i__4 = i__; + z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[ + i__4].i, z__2.i = ap[i__3].r * x[i__4].i + + ap[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ++k; +/* L90: */ + } + if (nounit) { + z_div(&z__1, &temp, &ap[kk + j - 1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &ap[k]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ++k; +/* L100: */ + } + if (nounit) { + d_cnjg(&z__2, &ap[kk + j - 1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; + kk += j; +/* L110: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = kx; + if (noconj) { + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + i__3 = k; + i__4 = ix; + z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[ + i__4].i, z__2.i = ap[i__3].r * x[i__4].i + + ap[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L120: */ + } + if (nounit) { + z_div(&z__1, &temp, &ap[kk + j - 1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + d_cnjg(&z__3, &ap[k]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L130: */ + } + if (nounit) { + d_cnjg(&z__2, &ap[kk + j - 1]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; + kk += j; +/* L140: */ + } + } + } else { + kk = *n * (*n + 1) / 2; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + k = kk; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = k; + i__3 = i__; + z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[ + i__3].i, z__2.i = ap[i__2].r * x[i__3].i + + ap[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + --k; +/* L150: */ + } + if (nounit) { + z_div(&z__1, &temp, &ap[kk - *n + j]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_cnjg(&z__3, &ap[k]); + i__2 = i__; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[ + i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + --k; +/* L160: */ + } + if (nounit) { + d_cnjg(&z__2, &ap[kk - *n + j]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; + kk -= *n - j + 1; +/* L170: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = kx; + if (noconj) { + i__1 = kk - (*n - (j + 1)); + for (k = kk; k >= i__1; --k) { + i__2 = k; + i__3 = ix; + z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[ + i__3].i, z__2.i = ap[i__2].r * x[i__3].i + + ap[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; +/* L180: */ + } + if (nounit) { + z_div(&z__1, &temp, &ap[kk - *n + j]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = kk - (*n - (j + 1)); + for (k = kk; k >= i__1; --k) { + d_cnjg(&z__3, &ap[k]); + i__2 = ix; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[ + i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; +/* L190: */ + } + if (nounit) { + d_cnjg(&z__2, &ap[kk - *n + j]); + z_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; + kk -= *n - j + 1; +/* L200: */ + } + } + } + } + + return 0; + +/* End of ZTPSV */ + +} /* ztpsv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ztptri.cpp b/lib/linalg/ztptri.cpp new file mode 100644 index 0000000000..c781cb964f --- /dev/null +++ b/lib/linalg/ztptri.cpp @@ -0,0 +1,308 @@ +/* fortran/ztptri.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static doublecomplex c_b1 = {1.,0.}; +static integer c__1 = 1; + +/* > \brief \b ZTPTRI */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZTPTRI + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZTPTRI( UPLO, DIAG, N, AP, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER DIAG, UPLO */ +/* INTEGER INFO, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 AP( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZTPTRI computes the inverse of a complex upper or lower triangular */ +/* > matrix A stored in packed format. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': A is upper triangular; */ +/* > = 'L': A is lower triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > = 'N': A is non-unit triangular; */ +/* > = 'U': A is unit triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix A. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] AP */ +/* > \verbatim */ +/* > AP is COMPLEX*16 array, dimension (N*(N+1)/2) */ +/* > On entry, the upper or lower triangular matrix A, stored */ +/* > columnwise in a linear array. The j-th column of A is stored */ +/* > in the array AP as follows: */ +/* > if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ +/* > if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. */ +/* > See below for further details. */ +/* > On exit, the (triangular) inverse of the original matrix, in */ +/* > the same packed storage format. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ +/* > matrix is singular and its inverse can not be computed. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > A triangular matrix A can be transferred to packed storage using one */ +/* > of the following program segments: */ +/* > */ +/* > UPLO = 'U': UPLO = 'L': */ +/* > */ +/* > JC = 1 JC = 1 */ +/* > DO 2 J = 1, N DO 2 J = 1, N */ +/* > DO 1 I = 1, J DO 1 I = J, N */ +/* > AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) */ +/* > 1 CONTINUE 1 CONTINUE */ +/* > JC = JC + J JC = JC + N - J + 1 */ +/* > 2 CONTINUE 2 CONTINUE */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int ztptri_(char *uplo, char *diag, integer *n, + doublecomplex *ap, integer *info, ftnlen uplo_len, ftnlen diag_len) +{ + /* System generated locals */ + integer i__1, i__2; + doublecomplex z__1; + + /* Builtin functions */ + void z_div(doublecomplex *, doublecomplex *, doublecomplex *); + + /* Local variables */ + integer j, jc, jj; + doublecomplex ajj; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + logical upper; + extern /* Subroutine */ int ztpmv_(char *, char *, char *, integer *, + doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + integer jclast; + logical nounit; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --ap; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! nounit && ! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZTPTRI", &i__1, (ftnlen)6); + return 0; + } + +/* Check for singularity if non-unit. */ + + if (nounit) { + if (upper) { + jj = 0; + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + jj += *info; + i__2 = jj; + if (ap[i__2].r == 0. && ap[i__2].i == 0.) { + return 0; + } +/* L10: */ + } + } else { + jj = 1; + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = jj; + if (ap[i__2].r == 0. && ap[i__2].i == 0.) { + return 0; + } + jj = jj + *n - *info + 1; +/* L20: */ + } + } + *info = 0; + } + + if (upper) { + +/* Compute inverse of upper triangular matrix. */ + + jc = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + i__2 = jc + j - 1; + z_div(&z__1, &c_b1, &ap[jc + j - 1]); + ap[i__2].r = z__1.r, ap[i__2].i = z__1.i; + i__2 = jc + j - 1; + z__1.r = -ap[i__2].r, z__1.i = -ap[i__2].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + +/* Compute elements 1:j-1 of j-th column. */ + + i__2 = j - 1; + ztpmv_((char *)"Upper", (char *)"No transpose", diag, &i__2, &ap[1], &ap[jc], & + c__1, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__2 = j - 1; + zscal_(&i__2, &ajj, &ap[jc], &c__1); + jc += j; +/* L30: */ + } + + } else { + +/* Compute inverse of lower triangular matrix. */ + + jc = *n * (*n + 1) / 2; + for (j = *n; j >= 1; --j) { + if (nounit) { + i__1 = jc; + z_div(&z__1, &c_b1, &ap[jc]); + ap[i__1].r = z__1.r, ap[i__1].i = z__1.i; + i__1 = jc; + z__1.r = -ap[i__1].r, z__1.i = -ap[i__1].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + if (j < *n) { + +/* Compute elements j+1:n of j-th column. */ + + i__1 = *n - j; + ztpmv_((char *)"Lower", (char *)"No transpose", diag, &i__1, &ap[jclast], &ap[ + jc + 1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__1 = *n - j; + zscal_(&i__1, &ajj, &ap[jc + 1], &c__1); + } + jclast = jc; + jc = jc - *n + j - 2; +/* L40: */ + } + } + + return 0; + +/* End of ZTPTRI */ + +} /* ztptri_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ztrmm.cpp b/lib/linalg/ztrmm.cpp new file mode 100644 index 0000000000..cd71371d5b --- /dev/null +++ b/lib/linalg/ztrmm.cpp @@ -0,0 +1,762 @@ +/* fortran/ztrmm.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZTRMM */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) */ + +/* .. Scalar Arguments .. */ +/* COMPLEX*16 ALPHA */ +/* INTEGER LDA,LDB,M,N */ +/* CHARACTER DIAG,SIDE,TRANSA,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A(LDA,*),B(LDB,*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZTRMM performs one of the matrix-matrix operations */ +/* > */ +/* > B := alpha*op( A )*B, or B := alpha*B*op( A ) */ +/* > */ +/* > where alpha is a scalar, B is an m by n matrix, A is a unit, or */ +/* > non-unit, upper or lower triangular matrix and op( A ) is one of */ +/* > */ +/* > op( A ) = A or op( A ) = A**T or op( A ) = A**H. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > On entry, SIDE specifies whether op( A ) multiplies B from */ +/* > the left or right as follows: */ +/* > */ +/* > SIDE = 'L' or 'l' B := alpha*op( A )*B. */ +/* > */ +/* > SIDE = 'R' or 'r' B := alpha*B*op( A ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the matrix A is an upper or */ +/* > lower triangular matrix as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' A is an upper triangular matrix. */ +/* > */ +/* > UPLO = 'L' or 'l' A is a lower triangular matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANSA */ +/* > \verbatim */ +/* > TRANSA is CHARACTER*1 */ +/* > On entry, TRANSA specifies the form of op( A ) to be used in */ +/* > the matrix multiplication as follows: */ +/* > */ +/* > TRANSA = 'N' or 'n' op( A ) = A. */ +/* > */ +/* > TRANSA = 'T' or 't' op( A ) = A**T. */ +/* > */ +/* > TRANSA = 'C' or 'c' op( A ) = A**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > On entry, DIAG specifies whether or not A is unit triangular */ +/* > as follows: */ +/* > */ +/* > DIAG = 'U' or 'u' A is assumed to be unit triangular. */ +/* > */ +/* > DIAG = 'N' or 'n' A is not assumed to be unit */ +/* > triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > On entry, M specifies the number of rows of B. M must be at */ +/* > least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the number of columns of B. N must be */ +/* > at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] ALPHA */ +/* > \verbatim */ +/* > ALPHA is COMPLEX*16 */ +/* > On entry, ALPHA specifies the scalar alpha. When alpha is */ +/* > zero then A is not referenced and B need not be set before */ +/* > entry. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension ( LDA, k ), where k is m */ +/* > when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. */ +/* > Before entry with UPLO = 'U' or 'u', the leading k by k */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular matrix and the strictly lower triangular part of */ +/* > A is not referenced. */ +/* > Before entry with UPLO = 'L' or 'l', the leading k by k */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular matrix and the strictly upper triangular part of */ +/* > A is not referenced. */ +/* > Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* > A are not referenced either, but are assumed to be unity. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. When SIDE = 'L' or 'l' then */ +/* > LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' */ +/* > then LDA must be at least max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] B */ +/* > \verbatim */ +/* > B is COMPLEX*16 array, dimension ( LDB, N ). */ +/* > Before entry, the leading m by n part of the array B must */ +/* > contain the matrix B, and on exit is overwritten by the */ +/* > transformed matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDB */ +/* > \verbatim */ +/* > LDB is INTEGER */ +/* > On entry, LDB specifies the first dimension of B as declared */ +/* > in the calling (sub) program. LDB must be at least */ +/* > max( 1, m ). */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level3 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 3 Blas routine. */ +/* > */ +/* > -- Written on 8-February-1989. */ +/* > Jack Dongarra, Argonne National Laboratory. */ +/* > Iain Duff, AERE Harwell. */ +/* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ +/* > Sven Hammarling, Numerical Algorithms Group Ltd. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int ztrmm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, ftnlen side_len, ftnlen + uplo_len, ftnlen transa_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, k, info; + doublecomplex temp; + logical lside; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + + +/* -- Reference BLAS level3 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Parameters .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + + info = 0; + if (! lside && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (! lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 3; + } else if (! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_((char *)"ZTRMM ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (alpha->r == 0. && alpha->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form B := alpha*A*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + i__3 = k + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3] + .i, z__1.i = alpha->r * b[i__3].i + + alpha->i * b[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = k - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6] + .i, z__2.i = temp.r * a[i__6].i + + temp.i * a[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L30: */ + } + if (nounit) { + i__3 = k + k * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, z__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = k + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; + } +/* L40: */ + } +/* L50: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0. || b[i__2].i != 0.) { + i__2 = k + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2] + .i, z__1.i = alpha->r * b[i__2].i + + alpha->i * b[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = k + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + if (nounit) { + i__2 = k + j * b_dim1; + i__3 = k + j * b_dim1; + i__4 = k + k * a_dim1; + z__1.r = b[i__3].r * a[i__4].r - b[i__3].i * + a[i__4].i, z__1.i = b[i__3].r * a[ + i__4].i + b[i__3].i * a[i__4].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = *m; + for (i__ = k + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5] + .i, z__2.i = temp.r * a[i__5].i + + temp.i * a[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L60: */ + } + } +/* L70: */ + } +/* L80: */ + } + } + } else { + +/* Form B := alpha*A**T*B or B := alpha*A**H*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + temp.r = b[i__2].r, temp.i = b[i__2].i; + if (noconj) { + if (nounit) { + i__2 = i__ + i__ * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2] + .i, z__1.i = temp.r * a[i__2].i + + temp.i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * + b[i__4].i, z__2.i = a[i__3].r * b[ + i__4].i + a[i__3].i * b[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = i__ - 1; + for (k = 1; k <= i__2; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3] + .i, z__2.i = z__3.r * b[i__3].i + + z__3.i * b[i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L110: */ + } +/* L120: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + temp.r = b[i__3].r, temp.i = b[i__3].i; + if (noconj) { + if (nounit) { + i__3 = i__ + i__ * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3] + .i, z__1.i = temp.r * a[i__3].i + + temp.i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * + b[i__5].i, z__2.i = a[i__4].r * b[ + i__5].i + a[i__4].i * b[i__5].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (k = i__ + 1; k <= i__3; ++k) { + d_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4] + .i, z__2.i = z__3.r * b[i__4].i + + z__3.i * b[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L140: */ + } + } + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * temp.r - alpha->i * temp.i, + z__1.i = alpha->r * temp.i + alpha->i * + temp.r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form B := alpha*B*A. */ + + if (upper) { + for (j = *n; j >= 1; --j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[i__1] + .r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3] + .r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L170: */ + } + i__1 = j - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + i__2 = k + j * a_dim1; + z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2] + .i, z__1.i = alpha->r * a[i__2].i + + alpha->i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, z__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L180: */ + } + } +/* L190: */ + } +/* L200: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[i__2] + .r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4] + .r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L210: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3] + .i, z__1.i = alpha->r * a[i__3].i + + alpha->i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, z__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L220: */ + } + } +/* L230: */ + } +/* L240: */ + } + } + } else { + +/* Form B := alpha*B*A**T or B := alpha*B*A**H. */ + + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k - 1; + for (j = 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + if (noconj) { + i__3 = j + k * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[ + i__3].i, z__1.i = alpha->r * a[i__3] + .i + alpha->i * a[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * + z__2.i, z__1.i = alpha->r * z__2.i + + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6] + .i, z__2.i = temp.r * b[i__6].i + + temp.i * b[i__6].r; + z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5] + .i + z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; +/* L250: */ + } + } +/* L260: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__2 = k + k * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + } + if (temp.r != 1. || temp.i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[ + i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L270: */ + } + } +/* L280: */ + } + } else { + for (k = *n; k >= 1; --k) { + i__1 = *n; + for (j = k + 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + if (noconj) { + i__2 = j + k * a_dim1; + z__1.r = alpha->r * a[i__2].r - alpha->i * a[ + i__2].i, z__1.i = alpha->r * a[i__2] + .i + alpha->i * a[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * + z__2.i, z__1.i = alpha->r * z__2.i + + alpha->i * z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5] + .i, z__2.i = temp.r * b[i__5].i + + temp.i * b[i__5].r; + z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4] + .i + z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; +/* L290: */ + } + } +/* L300: */ + } + temp.r = alpha->r, temp.i = alpha->i; + if (nounit) { + if (noconj) { + i__1 = k + k * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_cnjg(&z__2, &a[k + k * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + } + if (temp.r != 1. || temp.i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[ + i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; +/* L310: */ + } + } +/* L320: */ + } + } + } + } + + return 0; + +/* End of ZTRMM */ + +} /* ztrmm_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/ztrmv.cpp b/lib/linalg/ztrmv.cpp new file mode 100644 index 0000000000..ffc55b16fe --- /dev/null +++ b/lib/linalg/ztrmv.cpp @@ -0,0 +1,624 @@ +/* fortran/ztrmv.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZTRMV */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INCX,LDA,N */ +/* CHARACTER DIAG,TRANS,UPLO */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A(LDA,*),X(*) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZTRMV performs one of the matrix-vector operations */ +/* > */ +/* > x := A*x, or x := A**T*x, or x := A**H*x, */ +/* > */ +/* > where x is an n element vector and A is an n by n unit, or non-unit, */ +/* > upper or lower triangular matrix. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > On entry, UPLO specifies whether the matrix is an upper or */ +/* > lower triangular matrix as follows: */ +/* > */ +/* > UPLO = 'U' or 'u' A is an upper triangular matrix. */ +/* > */ +/* > UPLO = 'L' or 'l' A is a lower triangular matrix. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > On entry, TRANS specifies the operation to be performed as */ +/* > follows: */ +/* > */ +/* > TRANS = 'N' or 'n' x := A*x. */ +/* > */ +/* > TRANS = 'T' or 't' x := A**T*x. */ +/* > */ +/* > TRANS = 'C' or 'c' x := A**H*x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] DIAG */ +/* > \verbatim */ +/* > DIAG is CHARACTER*1 */ +/* > On entry, DIAG specifies whether or not A is unit */ +/* > triangular as follows: */ +/* > */ +/* > DIAG = 'U' or 'u' A is assumed to be unit triangular. */ +/* > */ +/* > DIAG = 'N' or 'n' A is not assumed to be unit */ +/* > triangular. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > On entry, N specifies the order of the matrix A. */ +/* > N must be at least zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension ( LDA, N ). */ +/* > Before entry with UPLO = 'U' or 'u', the leading n by n */ +/* > upper triangular part of the array A must contain the upper */ +/* > triangular matrix and the strictly lower triangular part of */ +/* > A is not referenced. */ +/* > Before entry with UPLO = 'L' or 'l', the leading n by n */ +/* > lower triangular part of the array A must contain the lower */ +/* > triangular matrix and the strictly upper triangular part of */ +/* > A is not referenced. */ +/* > Note that when DIAG = 'U' or 'u', the diagonal elements of */ +/* > A are not referenced either, but are assumed to be unity. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > On entry, LDA specifies the first dimension of A as declared */ +/* > in the calling (sub) program. LDA must be at least */ +/* > max( 1, n ). */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension at least */ +/* > ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* > Before entry, the incremented array X must contain the n */ +/* > element vector x. On exit, X is overwritten with the */ +/* > transformed vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > On entry, INCX specifies the increment for the elements of */ +/* > X. INCX must not be zero. */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16_blas_level2 */ + +/* > \par Further Details: */ +/* ===================== */ +/* > */ +/* > \verbatim */ +/* > */ +/* > Level 2 Blas routine. */ +/* > The vector and matrix arguments are not referenced when N = 0, or M = 0 */ +/* > */ +/* > -- Written on 22-October-1986. */ +/* > Jack Dongarra, Argonne National Lab. */ +/* > Jeremy Du Croz, Nag Central Office. */ +/* > Sven Hammarling, Nag Central Office. */ +/* > Richard Hanson, Sandia National Labs. */ +/* > \endverbatim */ +/* > */ +/* ===================================================================== */ +/* Subroutine */ int ztrmv_(char *uplo, char *trans, char *diag, integer *n, + doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, + ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + + +/* -- Reference BLAS level2 routine -- */ +/* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, (char *)"L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, (char *)"C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,*n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_((char *)"ZTRMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + +/* Form x := A*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; +/* L10: */ + } + if (nounit) { + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, z__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + + z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + ix += *incx; +/* L30: */ + } + if (nounit) { + i__2 = jx; + i__3 = jx; + i__4 = j + j * a_dim1; + z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[ + i__4].i, z__1.i = x[i__3].r * a[i__4].i + + x[i__3].i * a[i__4].r; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + } + jx += *incx; +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__; + i__3 = i__; + i__4 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + z__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; +/* L50: */ + } + if (nounit) { + i__1 = j; + i__2 = j; + i__3 = j + j * a_dim1; + z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, z__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = kx; + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = ix; + i__3 = ix; + i__4 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + z__2.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + ix -= *incx; +/* L70: */ + } + if (nounit) { + i__1 = jx; + i__2 = jx; + i__3 = j + j * a_dim1; + z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, z__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + jx -= *incx; +/* L80: */ + } + } + } + } else { + +/* Form x := A**T*x or x := A**H*x. */ + + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, z__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__1 = i__; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; +/* L110: */ + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + if (noconj) { + if (nounit) { + i__1 = j + j * a_dim1; + z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, + z__1.i = temp.r * a[i__1].i + temp.i * a[ + i__1].r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = i__ + j * a_dim1; + i__2 = ix; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, z__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L120: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__1 = ix; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L130: */ + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L150: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L160: */ + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; +/* L170: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + if (noconj) { + if (nounit) { + i__2 = j + j * a_dim1; + z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__1.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[ + i__4].i, z__2.i = a[i__3].r * x[i__4].i + + a[i__3].i * x[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L180: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + d_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[ + i__3].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L190: */ + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of ZTRMV */ + +} /* ztrmv_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zung2l.cpp b/lib/linalg/zung2l.cpp new file mode 100644 index 0000000000..45c03e5c78 --- /dev/null +++ b/lib/linalg/zung2l.cpp @@ -0,0 +1,262 @@ +/* fortran/zung2l.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b ZUNG2L generates all or part of the unitary matrix Q from a QL factorization determined by cgeq +lf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNG2L + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNG2L generates an m by n complex matrix Q with orthonormal columns, */ +/* > which is defined as the last n columns of a product of k elementary */ +/* > reflectors of order m */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by ZGEQLF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the (n-k+i)-th column must contain the vector which */ +/* > defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* > returned by ZGEQLF in the last k columns of its array */ +/* > argument A. */ +/* > On exit, the m-by-n matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGEQLF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zung2l_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + integer i__, j, l, ii; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, ftnlen), xerbla_(char *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNG2L", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns 1:n-k to columns of the unit matrix */ + + i__1 = *n - *k; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + i__2 = *m - *n + j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; +/* L20: */ + } + + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + ii = *n - *k + i__; + +/* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */ + + i__2 = *m - *n + ii + ii * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + i__2 = *m - *n + ii; + i__3 = ii - 1; + zlarf_((char *)"Left", &i__2, &i__3, &a[ii * a_dim1 + 1], &c__1, &tau[i__], & + a[a_offset], lda, &work[1], (ftnlen)4); + i__2 = *m - *n + ii - 1; + i__3 = i__; + z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; + zscal_(&i__2, &z__1, &a[ii * a_dim1 + 1], &c__1); + i__2 = *m - *n + ii + ii * a_dim1; + i__3 = i__; + z__1.r = 1. - tau[i__3].r, z__1.i = 0. - tau[i__3].i; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + +/* Set A(m-k+i+1:m,n-k+i) to zero */ + + i__2 = *m; + for (l = *m - *n + ii + 1; l <= i__2; ++l) { + i__3 = l + ii * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of ZUNG2L */ + +} /* zung2l_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zung2r.cpp b/lib/linalg/zung2r.cpp new file mode 100644 index 0000000000..4bd9202caa --- /dev/null +++ b/lib/linalg/zung2r.cpp @@ -0,0 +1,264 @@ +/* fortran/zung2r.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b ZUNG2R */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNG2R + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNG2R generates an m by n complex matrix Q with orthonormal columns, */ +/* > which is defined as the first n columns of a product of k elementary */ +/* > reflectors of order m */ +/* > */ +/* > Q = H(1) H(2) . . . H(k) */ +/* > */ +/* > as returned by ZGEQRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the i-th column must contain the vector which */ +/* > defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* > returned by ZGEQRF in the first k columns of its array */ +/* > argument A. */ +/* > On exit, the m by n matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (N) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zung2r_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Local variables */ + integer i__, j, l; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, ftnlen), xerbla_(char *, integer *, + ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNG2R", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + +/* Initialise columns k+1:n to columns of the unit matrix */ + + i__1 = *n; + for (j = *k + 1; j <= i__1; ++j) { + i__2 = *m; + for (l = 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; +/* L20: */ + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i) to A(i:m,i:n) from the left */ + + if (i__ < *n) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m - i__ + 1; + i__2 = *n - i__; + zlarf_((char *)"Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[ + i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1], ( + ftnlen)4); + } + if (i__ < *m) { + i__1 = *m - i__; + i__2 = i__; + z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; + zscal_(&i__1, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1); + } + i__1 = i__ + i__ * a_dim1; + i__2 = i__; + z__1.r = 1. - tau[i__2].r, z__1.i = 0. - tau[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* Set A(1:i-1,i) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = l + i__ * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of ZUNG2R */ + +} /* zung2r_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zungl2.cpp b/lib/linalg/zungl2.cpp new file mode 100644 index 0000000000..bd40069fd6 --- /dev/null +++ b/lib/linalg/zungl2.cpp @@ -0,0 +1,273 @@ +/* fortran/zungl2.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* > \brief \b ZUNGL2 generates all or part of the unitary matrix Q from an LQ factorization determined by cge +lqf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNGL2 + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, */ +/* > which is defined as the first m rows of a product of k elementary */ +/* > reflectors of order n */ +/* > */ +/* > Q = H(k)**H . . . H(2)**H H(1)**H */ +/* > */ +/* > as returned by ZGELQF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. N >= M. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. M >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the i-th row must contain the vector which defines */ +/* > the elementary reflector H(i), for i = 1,2,...,k, as returned */ +/* > by ZGELQF in the first k rows of its array argument A. */ +/* > On exit, the m by n matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGELQF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (M) */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zungl2_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, l; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *), zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, ftnlen), xerbla_(char *, integer *, + ftnlen), zlacgv_(integer *, doublecomplex *, integer *); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < *m) { + *info = -2; + } else if (*k < 0 || *k > *m) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNGL2", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m <= 0) { + return 0; + } + + if (*k < *m) { + +/* Initialise rows k+1:m to rows of the unit matrix */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (l = *k + 1; l <= i__2; ++l) { + i__3 = l + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } + if (j > *k && j <= *m) { + i__2 = j + j * a_dim1; + a[i__2].r = 1., a[i__2].i = 0.; + } +/* L20: */ + } + } + + for (i__ = *k; i__ >= 1; --i__) { + +/* Apply H(i)**H to A(i:m,i:n) from the right */ + + if (i__ < *n) { + i__1 = *n - i__; + zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + if (i__ < *m) { + i__1 = i__ + i__ * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *m - i__; + i__2 = *n - i__ + 1; + d_cnjg(&z__1, &tau[i__]); + zlarf_((char *)"Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, & + z__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1], ( + ftnlen)5); + } + i__1 = *n - i__; + i__2 = i__; + z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; + zscal_(&i__1, &z__1, &a[i__ + (i__ + 1) * a_dim1], lda); + i__1 = *n - i__; + zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda); + } + i__1 = i__ + i__ * a_dim1; + d_cnjg(&z__2, &tau[i__]); + z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + +/* Set A(i,1:i-1) to zero */ + + i__1 = i__ - 1; + for (l = 1; l <= i__1; ++l) { + i__2 = i__ + l * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } +/* L40: */ + } + return 0; + +/* End of ZUNGL2 */ + +} /* zungl2_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zungql.cpp b/lib/linalg/zungql.cpp new file mode 100644 index 0000000000..006c4859eb --- /dev/null +++ b/lib/linalg/zungql.cpp @@ -0,0 +1,381 @@ +/* fortran/zungql.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* > \brief \b ZUNGQL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNGQL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNGQL generates an M-by-N complex matrix Q with orthonormal columns, */ +/* > which is defined as the last N columns of a product of K elementary */ +/* > reflectors of order M */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by ZGEQLF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the (n-k+i)-th column must contain the vector which */ +/* > defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* > returned by ZGEQLF in the last k columns of its array */ +/* > argument A. */ +/* > On exit, the M-by-N matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGEQLF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zungql_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + + /* Local variables */ + integer i__, j, l, ib, nb, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zung2l_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen); + logical lquery; + integer lwkopt; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } + + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + nb = ilaenv_(&c__1, (char *)"ZUNGQL", (char *)" ", m, n, k, &c_n1, (ftnlen)6, ( + ftnlen)1); + lwkopt = *n * nb; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + + if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNGQL", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"ZUNGQL", (char *)" ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"ZUNGQL", (char *)" ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the first block. */ +/* The last kk columns are handled by the block method. */ + +/* Computing MIN */ + i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; + kk = min(i__1,i__2); + +/* Set A(m-kk+1:m,1:n-kk) to zero. */ + + i__1 = *n - kk; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = *m - kk + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the first or only block. */ + + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + zung2l_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo) + ; + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = *k; + i__2 = nb; + for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += + i__2) { +/* Computing MIN */ + i__3 = nb, i__4 = *k - i__ + 1; + ib = min(i__3,i__4); + if (*n - *k + i__ > 1) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__3 = *m - *k + i__ + ib - 1; + zlarft_((char *)"Backward", (char *)"Columnwise", &i__3, &ib, &a[(*n - *k + + i__) * a_dim1 + 1], lda, &tau[i__], &work[1], &ldwork, + (ftnlen)8, (ftnlen)10); + +/* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */ + + i__3 = *m - *k + i__ + ib - 1; + i__4 = *n - *k + i__ - 1; + zlarfb_((char *)"Left", (char *)"No transpose", (char *)"Backward", (char *)"Columnwise", & + i__3, &i__4, &ib, &a[(*n - *k + i__) * a_dim1 + 1], + lda, &work[1], &ldwork, &a[a_offset], lda, &work[ib + + 1], &ldwork, (ftnlen)4, (ftnlen)12, (ftnlen)8, ( + ftnlen)10); + } + +/* Apply H to rows 1:m-k+i+ib-1 of current block */ + + i__3 = *m - *k + i__ + ib - 1; + zung2l_(&i__3, &ib, &ib, &a[(*n - *k + i__) * a_dim1 + 1], lda, & + tau[i__], &work[1], &iinfo); + +/* Set rows m-k+i+ib:m of current block to zero */ + + i__3 = *n - *k + i__ + ib - 1; + for (j = *n - *k + i__; j <= i__3; ++j) { + i__4 = *m; + for (l = *m - *k + i__ + ib; l <= i__4; ++l) { + i__5 = l + j * a_dim1; + a[i__5].r = 0., a[i__5].i = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZUNGQL */ + +} /* zungql_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zungqr.cpp b/lib/linalg/zungqr.cpp new file mode 100644 index 0000000000..7ce6a2a40d --- /dev/null +++ b/lib/linalg/zungqr.cpp @@ -0,0 +1,372 @@ +/* fortran/zungqr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; + +/* > \brief \b ZUNGQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNGQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* INTEGER INFO, K, LDA, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns, */ +/* > which is defined as the first N columns of a product of K elementary */ +/* > reflectors of order M */ +/* > */ +/* > Q = H(1) H(2) . . . H(k) */ +/* > */ +/* > as returned by ZGEQRF. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix Q. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix Q. M >= N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines the */ +/* > matrix Q. N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the i-th column must contain the vector which */ +/* > defines the elementary reflector H(i), for i = 1,2,...,k, as */ +/* > returned by ZGEQRF in the first k columns of its array */ +/* > argument A. */ +/* > On exit, the M-by-N matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The first dimension of the array A. LDA >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= max(1,N). */ +/* > For optimum performance LWORK >= N*NB, where NB is the */ +/* > optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument has an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zungqr_(integer *m, integer *n, integer *k, + doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * + work, integer *lwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo; + extern /* Subroutine */ int zung2r_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + nb = ilaenv_(&c__1, (char *)"ZUNGQR", (char *)" ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1,*n) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*m < 0) { + *info = -1; + } else if (*n < 0 || *n > *m) { + *info = -2; + } else if (*k < 0 || *k > *n) { + *info = -3; + } else if (*lda < max(1,*m)) { + *info = -5; + } else if (*lwork < max(1,*n) && ! lquery) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNGQR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n <= 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + nx = 0; + iws = *n; + if (nb > 1 && nb < *k) { + +/* Determine when to cross over from blocked to unblocked code. */ + +/* Computing MAX */ + i__1 = 0, i__2 = ilaenv_(&c__3, (char *)"ZUNGQR", (char *)" ", m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)1); + nx = max(i__1,i__2); + if (nx < *k) { + +/* Determine if workspace is large enough for blocked code. */ + + ldwork = *n; + iws = ldwork * nb; + if (*lwork < iws) { + +/* Not enough workspace to use optimal NB: reduce NB and */ +/* determine the minimum value of NB. */ + + nb = *lwork / ldwork; +/* Computing MAX */ + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"ZUNGQR", (char *)" ", m, n, k, &c_n1, + (ftnlen)6, (ftnlen)1); + nbmin = max(i__1,i__2); + } + } + } + + if (nb >= nbmin && nb < *k && nx < *k) { + +/* Use blocked code after the last block. */ +/* The first kk columns are handled by the block method. */ + + ki = (*k - nx - 1) / nb * nb; +/* Computing MIN */ + i__1 = *k, i__2 = ki + nb; + kk = min(i__1,i__2); + +/* Set A(1:kk,kk+1:n) to zero. */ + + i__1 = *n; + for (j = kk + 1; j <= i__1; ++j) { + i__2 = kk; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + a[i__3].r = 0., a[i__3].i = 0.; +/* L10: */ + } +/* L20: */ + } + } else { + kk = 0; + } + +/* Use unblocked code for the last or only block. */ + + if (kk < *n) { + i__1 = *m - kk; + i__2 = *n - kk; + i__3 = *k - kk; + zung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, & + tau[kk + 1], &work[1], &iinfo); + } + + if (kk > 0) { + +/* Use blocked code */ + + i__1 = -nb; + for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { +/* Computing MIN */ + i__2 = nb, i__3 = *k - i__ + 1; + ib = min(i__2,i__3); + if (i__ + ib <= *n) { + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__2 = *m - i__ + 1; + zlarft_((char *)"Forward", (char *)"Columnwise", &i__2, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[1], &ldwork, (ftnlen)7, + (ftnlen)10); + +/* Apply H to A(i:m,i+ib:n) from the left */ + + i__2 = *m - i__ + 1; + i__3 = *n - i__ - ib + 1; + zlarfb_((char *)"Left", (char *)"No transpose", (char *)"Forward", (char *)"Columnwise", & + i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[ + 1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, & + work[ib + 1], &ldwork, (ftnlen)4, (ftnlen)12, (ftnlen) + 7, (ftnlen)10); + } + +/* Apply H to rows i:m of current block */ + + i__2 = *m - i__ + 1; + zung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], & + work[1], &iinfo); + +/* Set rows 1:i-1 of current block to zero */ + + i__2 = i__ + ib - 1; + for (j = i__; j <= i__2; ++j) { + i__3 = i__ - 1; + for (l = 1; l <= i__3; ++l) { + i__4 = l + j * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; +/* L30: */ + } +/* L40: */ + } +/* L50: */ + } + } + + work[1].r = (doublereal) iws, work[1].i = 0.; + return 0; + +/* End of ZUNGQR */ + +} /* zungqr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zungtr.cpp b/lib/linalg/zungtr.cpp new file mode 100644 index 0000000000..c8f42b6e21 --- /dev/null +++ b/lib/linalg/zungtr.cpp @@ -0,0 +1,342 @@ +/* fortran/zungtr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; + +/* > \brief \b ZUNGTR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNGTR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER UPLO */ +/* INTEGER INFO, LDA, LWORK, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNGTR generates a complex unitary matrix Q which is defined as the */ +/* > product of n-1 elementary reflectors of order N, as returned by */ +/* > ZHETRD: */ +/* > */ +/* > if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), */ +/* > */ +/* > if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A contains elementary reflectors */ +/* > from ZHETRD; */ +/* > = 'L': Lower triangle of A contains elementary reflectors */ +/* > from ZHETRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The order of the matrix Q. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,N) */ +/* > On entry, the vectors which define the elementary reflectors, */ +/* > as returned by ZHETRD. */ +/* > On exit, the N-by-N unitary matrix Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. LDA >= N. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (N-1) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZHETRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. LWORK >= N-1. */ +/* > For optimum performance LWORK >= (N-1)*NB, where NB is */ +/* > the optimal blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zungtr_(char *uplo, integer *n, doublecomplex *a, + integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork, + integer *info, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zungql_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *), zungqr_(integer *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, integer *); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + + /* Function Body */ + *info = 0; + lquery = *lwork == -1; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } else /* if(complicated condition) */ { +/* Computing MAX */ + i__1 = 1, i__2 = *n - 1; + if (*lwork < max(i__1,i__2) && ! lquery) { + *info = -7; + } + } + + if (*info == 0) { + if (upper) { + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, (char *)"ZUNGQL", (char *)" ", &i__1, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)1); + } else { + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, (char *)"ZUNGQR", (char *)" ", &i__1, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)1); + } +/* Computing MAX */ + i__1 = 1, i__2 = *n - 1; + lwkopt = max(i__1,i__2) * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNGTR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (upper) { + +/* Q was determined by a call to ZHETRD with UPLO = 'U' */ + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the left, and set the last row and column of Q to */ +/* those of the unit matrix */ + + i__1 = *n - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + (j + 1) * a_dim1; + a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; +/* L10: */ + } + i__2 = *n + j * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L20: */ + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + *n * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L30: */ + } + i__1 = *n + *n * a_dim1; + a[i__1].r = 1., a[i__1].i = 0.; + +/* Generate Q(1:n-1,1:n-1) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + zungql_(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], + lwork, &iinfo); + + } else { + +/* Q was determined by a call to ZHETRD with UPLO = 'L'. */ + +/* Shift the vectors which define the elementary reflectors one */ +/* column to the right, and set the first row and column of Q to */ +/* those of the unit matrix */ + + for (j = *n; j >= 2; --j) { + i__1 = j * a_dim1 + 1; + a[i__1].r = 0., a[i__1].i = 0.; + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__ + (j - 1) * a_dim1; + a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; +/* L40: */ + } +/* L50: */ + } + i__1 = a_dim1 + 1; + a[i__1].r = 1., a[i__1].i = 0.; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ + a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; +/* L60: */ + } + if (*n > 1) { + +/* Generate Q(2:n,2:n) */ + + i__1 = *n - 1; + i__2 = *n - 1; + i__3 = *n - 1; + zungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[1], + &work[1], lwork, &iinfo); + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNGTR */ + +} /* zungtr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zunm2l.cpp b/lib/linalg/zunm2l.cpp new file mode 100644 index 0000000000..d7fc940517 --- /dev/null +++ b/lib/linalg/zunm2l.cpp @@ -0,0 +1,339 @@ +/* fortran/zunm2l.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b ZUNM2L multiplies a general matrix by the unitary matrix from a QL factorization determined by +cgeqlf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNM2L + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNM2L overwrites the general complex m-by-n matrix C with */ +/* > */ +/* > Q * C if SIDE = 'L' and TRANS = 'N', or */ +/* > */ +/* > Q**H* C if SIDE = 'L' and TRANS = 'C', or */ +/* > */ +/* > C * Q if SIDE = 'R' and TRANS = 'N', or */ +/* > */ +/* > C * Q**H if SIDE = 'R' and TRANS = 'C', */ +/* > */ +/* > where Q is a complex unitary matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**H from the Left */ +/* > = 'R': apply Q or Q**H from the Right */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': apply Q (No transpose) */ +/* > = 'C': apply Q**H (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > ZGEQLF in the last k columns of its array argument A. */ +/* > A is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= max(1,M); */ +/* > if SIDE = 'R', LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGEQLF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the m-by-n matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension */ +/* > (N) if SIDE = 'L', */ +/* > (M) if SIDE = 'R' */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zunm2l_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info, + ftnlen side_len, ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, i1, i2, i3, mi, ni, nq; + doublecomplex aii; + logical left; + doublecomplex taui; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, ftnlen), xerbla_(char *, integer *, + ftnlen); + logical notran; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNM2L", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)**H is applied to C(1:m-k+i,1:n) */ + + mi = *m - *k + i__; + } else { + +/* H(i) or H(i)**H is applied to C(1:m,1:n-k+i) */ + + ni = *n - *k + i__; + } + +/* Apply H(i) or H(i)**H */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + d_cnjg(&z__1, &tau[i__]); + taui.r = z__1.r, taui.i = z__1.i; + } + i__3 = nq - *k + i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + zlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[ + c_offset], ldc, &work[1], (ftnlen)1); + i__3 = nq - *k + i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of ZUNM2L */ + +} /* zunm2l_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zunm2r.cpp b/lib/linalg/zunm2r.cpp new file mode 100644 index 0000000000..56ad0d944a --- /dev/null +++ b/lib/linalg/zunm2r.cpp @@ -0,0 +1,343 @@ +/* fortran/zunm2r.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; + +/* > \brief \b ZUNM2R multiplies a general matrix by the unitary matrix from a QR factorization determined by +cgeqrf (unblocked algorithm). */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNM2R + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNM2R overwrites the general complex m-by-n matrix C with */ +/* > */ +/* > Q * C if SIDE = 'L' and TRANS = 'N', or */ +/* > */ +/* > Q**H* C if SIDE = 'L' and TRANS = 'C', or */ +/* > */ +/* > C * Q if SIDE = 'R' and TRANS = 'N', or */ +/* > */ +/* > C * Q**H if SIDE = 'R' and TRANS = 'C', */ +/* > */ +/* > where Q is a complex unitary matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k) */ +/* > */ +/* > as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**H from the Left */ +/* > = 'R': apply Q or Q**H from the Right */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': apply Q (No transpose) */ +/* > = 'C': apply Q**H (Conjugate transpose) */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > ZGEQRF in the first k columns of its array argument A. */ +/* > A is modified by the routine but restored on exit. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= max(1,M); */ +/* > if SIDE = 'R', LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the m-by-n matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension */ +/* > (N) if SIDE = 'L', */ +/* > (M) if SIDE = 'R' */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zunm2r_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info, + ftnlen side_len, ftnlen trans_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + doublecomplex z__1; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, i1, i2, i3, ic, jc, mi, ni, nq; + doublecomplex aii; + logical left; + doublecomplex taui; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarf_(char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, ftnlen), xerbla_(char *, integer *, + ftnlen); + logical notran; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + +/* NQ is the order of Q */ + + if (left) { + nq = *m; + } else { + nq = *n; + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNM2R", &i__1, (ftnlen)6); + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + return 0; + } + + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = 1; + } else { + i1 = *k; + i2 = 1; + i3 = -1; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + if (left) { + +/* H(i) or H(i)**H is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H(i) or H(i)**H is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H(i) or H(i)**H */ + + if (notran) { + i__3 = i__; + taui.r = tau[i__3].r, taui.i = tau[i__3].i; + } else { + d_cnjg(&z__1, &tau[i__]); + taui.r = z__1.r, taui.i = z__1.i; + } + i__3 = i__ + i__ * a_dim1; + aii.r = a[i__3].r, aii.i = a[i__3].i; + i__3 = i__ + i__ * a_dim1; + a[i__3].r = 1., a[i__3].i = 0.; + zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic + + jc * c_dim1], ldc, &work[1], (ftnlen)1); + i__3 = i__ + i__ * a_dim1; + a[i__3].r = aii.r, a[i__3].i = aii.i; +/* L10: */ + } + return 0; + +/* End of ZUNM2R */ + +} /* zunm2r_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zunmql.cpp b/lib/linalg/zunmql.cpp new file mode 100644 index 0000000000..c7e6134831 --- /dev/null +++ b/lib/linalg/zunmql.cpp @@ -0,0 +1,419 @@ +/* fortran/zunmql.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* > \brief \b ZUNMQL */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNMQL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNMQL overwrites the general complex M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'C': Q**H * C C * Q**H */ +/* > */ +/* > where Q is a complex unitary matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(k) . . . H(2) H(1) */ +/* > */ +/* > as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**H from the Left; */ +/* > = 'R': apply Q or Q**H from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'C': Conjugate transpose, apply Q**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > ZGEQLF in the last k columns of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= max(1,M); */ +/* > if SIDE = 'R', LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGEQLF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SIDE = 'L', LWORK >= max(1,N); */ +/* > if SIDE = 'R', LWORK >= max(1,M). */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zunmql_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info, ftnlen side_len, ftnlen trans_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__, i1, i2, i3, ib, nb, mi, ni, nq, nw, iwt; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + extern /* Subroutine */ int zunm2l_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical notran; + integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < nw && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + + if (*m == 0 || *n == 0) { + lwkopt = 1; + } else { +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"ZUNMQL", ch__1, m, n, k, &c_n1, + (ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb + 4160; + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNMQL", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + if (*lwork < lwkopt) { + nb = (*lwork - 4160) / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"ZUNMQL", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + zunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + } else { + +/* Use blocked code */ + + iwt = nw * nb + 1; + if (left && notran || ! left && ! notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + } else { + mi = *m; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i+ib-1) . . . H(i+1) H(i) */ + + i__4 = nq - *k + i__ + ib - 1; + zlarft_((char *)"Backward", (char *)"Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1] + , lda, &tau[i__], &work[iwt], &c__65, (ftnlen)8, (ftnlen) + 10); + if (left) { + +/* H or H**H is applied to C(1:m-k+i+ib-1,1:n) */ + + mi = *m - *k + i__ + ib - 1; + } else { + +/* H or H**H is applied to C(1:m,1:n-k+i+ib-1) */ + + ni = *n - *k + i__ + ib - 1; + } + +/* Apply H or H**H */ + + zlarfb_(side, trans, (char *)"Backward", (char *)"Columnwise", &mi, &ni, &ib, &a[ + i__ * a_dim1 + 1], lda, &work[iwt], &c__65, &c__[c_offset] + , ldc, &work[1], &ldwork, (ftnlen)1, (ftnlen)1, (ftnlen)8, + (ftnlen)10); +/* L10: */ + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMQL */ + +} /* zunmql_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zunmqr.cpp b/lib/linalg/zunmqr.cpp new file mode 100644 index 0000000000..dc127fe1aa --- /dev/null +++ b/lib/linalg/zunmqr.cpp @@ -0,0 +1,420 @@ +/* fortran/zunmqr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__65 = 65; + +/* > \brief \b ZUNMQR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNMQR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS */ +/* INTEGER INFO, K, LDA, LDC, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNMQR overwrites the general complex M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'C': Q**H * C C * Q**H */ +/* > */ +/* > where Q is a complex unitary matrix defined as the product of k */ +/* > elementary reflectors */ +/* > */ +/* > Q = H(1) H(2) . . . H(k) */ +/* > */ +/* > as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N */ +/* > if SIDE = 'R'. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**H from the Left; */ +/* > = 'R': apply Q or Q**H from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'C': Conjugate transpose, apply Q**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] K */ +/* > \verbatim */ +/* > K is INTEGER */ +/* > The number of elementary reflectors whose product defines */ +/* > the matrix Q. */ +/* > If SIDE = 'L', M >= K >= 0; */ +/* > if SIDE = 'R', N >= K >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension (LDA,K) */ +/* > The i-th column must contain the vector which defines the */ +/* > elementary reflector H(i), for i = 1,2,...,k, as returned by */ +/* > ZGEQRF in the first k columns of its array argument A. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > If SIDE = 'L', LDA >= max(1,M); */ +/* > if SIDE = 'R', LDA >= max(1,N). */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension (K) */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZGEQRF. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SIDE = 'L', LWORK >= max(1,N); */ +/* > if SIDE = 'R', LWORK >= max(1,M). */ +/* > For good performance, LWORK should generally be larger. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zunmqr_(char *side, char *trans, integer *m, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info, ftnlen side_len, ftnlen trans_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, + i__5; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i__, i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iwt; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + extern /* Subroutine */ int zunm2r_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, + ftnlen), xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *, + integer *, integer *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical notran; + integer ldwork; + extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + notran = lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! notran && ! lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*k < 0 || *k > nq) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < nw && ! lquery) { + *info = -12; + } + + if (*info == 0) { + +/* Compute the workspace requirements */ + +/* Computing MIN */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"ZUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nb = min(i__1,i__2); + lwkopt = nw * nb + 4160; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZUNMQR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || *k == 0) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + nbmin = 2; + ldwork = nw; + if (nb > 1 && nb < *k) { + if (*lwork < lwkopt) { + nb = (*lwork - 4160) / ldwork; +/* Computing MAX */ +/* Writing concatenation */ + i__3[0] = 1, a__1[0] = side; + i__3[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + i__1 = 2, i__2 = ilaenv_(&c__2, (char *)"ZUNMQR", ch__1, m, n, k, &c_n1, ( + ftnlen)6, (ftnlen)2); + nbmin = max(i__1,i__2); + } + } + + if (nb < nbmin || nb >= *k) { + +/* Use unblocked code */ + + zunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ + c_offset], ldc, &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + } else { + +/* Use blocked code */ + + iwt = nw * nb + 1; + if (left && ! notran || ! left && notran) { + i1 = 1; + i2 = *k; + i3 = nb; + } else { + i1 = (*k - 1) / nb * nb + 1; + i2 = 1; + i3 = -nb; + } + + if (left) { + ni = *n; + jc = 1; + } else { + mi = *m; + ic = 1; + } + + i__1 = i2; + i__2 = i3; + for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { +/* Computing MIN */ + i__4 = nb, i__5 = *k - i__ + 1; + ib = min(i__4,i__5); + +/* Form the triangular factor of the block reflector */ +/* H = H(i) H(i+1) . . . H(i+ib-1) */ + + i__4 = nq - i__ + 1; + zlarft_((char *)"Forward", (char *)"Columnwise", &i__4, &ib, &a[i__ + i__ * + a_dim1], lda, &tau[i__], &work[iwt], &c__65, (ftnlen)7, ( + ftnlen)10); + if (left) { + +/* H or H**H is applied to C(i:m,1:n) */ + + mi = *m - i__ + 1; + ic = i__; + } else { + +/* H or H**H is applied to C(1:m,i:n) */ + + ni = *n - i__ + 1; + jc = i__; + } + +/* Apply H or H**H */ + + zlarfb_(side, trans, (char *)"Forward", (char *)"Columnwise", &mi, &ni, &ib, &a[ + i__ + i__ * a_dim1], lda, &work[iwt], &c__65, &c__[ic + + jc * c_dim1], ldc, &work[1], &ldwork, (ftnlen)1, (ftnlen) + 1, (ftnlen)7, (ftnlen)10); +/* L10: */ + } + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMQR */ + +} /* zunmqr_ */ + +#ifdef __cplusplus + } +#endif diff --git a/lib/linalg/zunmtr.cpp b/lib/linalg/zunmtr.cpp new file mode 100644 index 0000000000..ab5e7eacb8 --- /dev/null +++ b/lib/linalg/zunmtr.cpp @@ -0,0 +1,397 @@ +/* fortran/zunmtr.f -- translated by f2c (version 20200916). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" + +/* Table of constant values */ + +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; + +/* > \brief \b ZUNMTR */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZUNMTR + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, */ +/* WORK, LWORK, INFO ) */ + +/* .. Scalar Arguments .. */ +/* CHARACTER SIDE, TRANS, UPLO */ +/* INTEGER INFO, LDA, LDC, LWORK, M, N */ +/* .. */ +/* .. Array Arguments .. */ +/* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) */ +/* .. */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZUNMTR overwrites the general complex M-by-N matrix C with */ +/* > */ +/* > SIDE = 'L' SIDE = 'R' */ +/* > TRANS = 'N': Q * C C * Q */ +/* > TRANS = 'C': Q**H * C C * Q**H */ +/* > */ +/* > where Q is a complex unitary matrix of order nq, with nq = m if */ +/* > SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of */ +/* > nq-1 elementary reflectors, as returned by ZHETRD: */ +/* > */ +/* > if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); */ +/* > */ +/* > if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] SIDE */ +/* > \verbatim */ +/* > SIDE is CHARACTER*1 */ +/* > = 'L': apply Q or Q**H from the Left; */ +/* > = 'R': apply Q or Q**H from the Right. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] UPLO */ +/* > \verbatim */ +/* > UPLO is CHARACTER*1 */ +/* > = 'U': Upper triangle of A contains elementary reflectors */ +/* > from ZHETRD; */ +/* > = 'L': Lower triangle of A contains elementary reflectors */ +/* > from ZHETRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TRANS */ +/* > \verbatim */ +/* > TRANS is CHARACTER*1 */ +/* > = 'N': No transpose, apply Q; */ +/* > = 'C': Conjugate transpose, apply Q**H. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] M */ +/* > \verbatim */ +/* > M is INTEGER */ +/* > The number of rows of the matrix C. M >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of columns of the matrix C. N >= 0. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 array, dimension */ +/* > (LDA,M) if SIDE = 'L' */ +/* > (LDA,N) if SIDE = 'R' */ +/* > The vectors which define the elementary reflectors, as */ +/* > returned by ZHETRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDA */ +/* > \verbatim */ +/* > LDA is INTEGER */ +/* > The leading dimension of the array A. */ +/* > LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] TAU */ +/* > \verbatim */ +/* > TAU is COMPLEX*16 array, dimension */ +/* > (M-1) if SIDE = 'L' */ +/* > (N-1) if SIDE = 'R' */ +/* > TAU(i) must contain the scalar factor of the elementary */ +/* > reflector H(i), as returned by ZHETRD. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] C */ +/* > \verbatim */ +/* > C is COMPLEX*16 array, dimension (LDC,N) */ +/* > On entry, the M-by-N matrix C. */ +/* > On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LDC */ +/* > \verbatim */ +/* > LDC is INTEGER */ +/* > The leading dimension of the array C. LDC >= max(1,M). */ +/* > \endverbatim */ +/* > */ +/* > \param[out] WORK */ +/* > \verbatim */ +/* > WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) */ +/* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] LWORK */ +/* > \verbatim */ +/* > LWORK is INTEGER */ +/* > The dimension of the array WORK. */ +/* > If SIDE = 'L', LWORK >= max(1,N); */ +/* > if SIDE = 'R', LWORK >= max(1,M). */ +/* > For optimum performance LWORK >= N*NB if SIDE = 'L', and */ +/* > LWORK >=M*NB if SIDE = 'R', where NB is the optimal */ +/* > blocksize. */ +/* > */ +/* > If LWORK = -1, then a workspace query is assumed; the routine */ +/* > only calculates the optimal size of the WORK array, returns */ +/* > this value as the first entry of the WORK array, and no error */ +/* > message related to LWORK is issued by XERBLA. */ +/* > \endverbatim */ +/* > */ +/* > \param[out] INFO */ +/* > \verbatim */ +/* > INFO is INTEGER */ +/* > = 0: successful exit */ +/* > < 0: if INFO = -i, the i-th argument had an illegal value */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERcomputational */ + +/* ===================================================================== */ +/* Subroutine */ int zunmtr_(char *side, char *uplo, char *trans, integer *m, + integer *n, doublecomplex *a, integer *lda, doublecomplex *tau, + doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork, + integer *info, ftnlen side_len, ftnlen uplo_len, ftnlen trans_len) +{ + /* System generated locals */ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3; + char ch__1[2]; + + /* Builtin functions */ + /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); + + /* Local variables */ + integer i1, i2, nb, mi, ni, nq, nw; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + integer lwkopt; + logical lquery; + extern /* Subroutine */ int zunmql_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *, + ftnlen, ftnlen), zunmqr_(char *, char *, integer *, integer *, + integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, integer *, + ftnlen, ftnlen); + + +/* -- LAPACK computational routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Test the input arguments */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + + /* Function Body */ + *info = 0; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + +/* NQ is the order of Q and NW is the minimum dimension of WORK */ + + if (left) { + nq = *m; + nw = max(1,*n); + } else { + nq = *n; + nw = max(1,*m); + } + if (! left && ! lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (! upper && ! lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (! lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -3; + } else if (*m < 0) { + *info = -4; + } else if (*n < 0) { + *info = -5; + } else if (*lda < max(1,nq)) { + *info = -7; + } else if (*ldc < max(1,*m)) { + *info = -10; + } else if (*lwork < nw && ! lquery) { + *info = -12; + } + + if (*info == 0) { + if (upper) { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, (char *)"ZUNMQL", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, (char *)"ZUNMQL", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } else { + if (left) { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *m - 1; + i__3 = *m - 1; + nb = ilaenv_(&c__1, (char *)"ZUNMQR", ch__1, &i__2, n, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } else { +/* Writing concatenation */ + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + i__2 = *n - 1; + i__3 = *n - 1; + nb = ilaenv_(&c__1, (char *)"ZUNMQR", ch__1, m, &i__2, &i__3, &c_n1, ( + ftnlen)6, (ftnlen)2); + } + } + lwkopt = nw * nb; + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + } + + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"ZUNMTR", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + +/* Quick return if possible */ + + if (*m == 0 || *n == 0 || nq == 1) { + work[1].r = 1., work[1].i = 0.; + return 0; + } + + if (left) { + mi = *m - 1; + ni = *n; + } else { + mi = *m; + ni = *n - 1; + } + + if (upper) { + +/* Q was determined by a call to ZHETRD with UPLO = 'U' */ + + i__2 = nq - 1; + zunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, & + tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo, (ftnlen) + 1, (ftnlen)1); + } else { + +/* Q was determined by a call to ZHETRD with UPLO = 'L' */ + + if (left) { + i1 = 2; + i2 = 1; + } else { + i1 = 1; + i2 = 2; + } + i__2 = nq - 1; + zunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], & + c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo, (ftnlen) + 1, (ftnlen)1); + } + work[1].r = (doublereal) lwkopt, work[1].i = 0.; + return 0; + +/* End of ZUNMTR */ + +} /* zunmtr_ */ + +#ifdef __cplusplus + } +#endif