From a1a2a54f724c73cd32111efdf7f9f01429428d81 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 9 Nov 2024 04:04:52 -0500 Subject: [PATCH] update linalg for ML-QUIP --- lib/linalg/dbdsdc.cpp | 282 +++++++++++++ lib/linalg/dcombssq.cpp | 26 ++ lib/linalg/dgebak.cpp | 117 ++++++ lib/linalg/dgebal.cpp | 513 ++++++++++++++++++++++++ lib/linalg/dgehd2.cpp | 57 +++ lib/linalg/dgehrd.cpp | 144 +++++++ lib/linalg/dgesdd.cpp | 788 ++++++++++++++++++++++++++++++++++++ lib/linalg/dhseqr.cpp | 145 +++++++ lib/linalg/dlaexc.cpp | 214 ++++++++++ lib/linalg/dlahqr.cpp | 311 +++++++++++++++ lib/linalg/dlahr2.cpp | 121 ++++++ lib/linalg/dlaln2.cpp | 298 ++++++++++++++ lib/linalg/dlanv2.cpp | 106 +++++ lib/linalg/dlaqr0.cpp | 306 ++++++++++++++ lib/linalg/dlaqr1.cpp | 52 +++ lib/linalg/dlaqr2.cpp | 359 +++++++++++++++++ lib/linalg/dlaqr3.cpp | 375 ++++++++++++++++++ lib/linalg/dlaqr4.cpp | 298 ++++++++++++++ lib/linalg/dlaqr5.cpp | 521 ++++++++++++++++++++++++ lib/linalg/dlarfx.cpp | 552 ++++++++++++++++++++++++++ lib/linalg/dlasd0.cpp | 143 +++++++ lib/linalg/dlasd1.cpp | 96 +++++ lib/linalg/dlasd2.cpp | 282 +++++++++++++ lib/linalg/dlasd3.cpp | 218 ++++++++++ lib/linalg/dlasy2.cpp | 284 +++++++++++++ lib/linalg/dlasyf.cpp | 337 ++++++++++++++++ lib/linalg/dorghr.cpp | 94 +++++ lib/linalg/dormhr.cpp | 111 ++++++ lib/linalg/dsyconv.cpp | 199 ++++++++++ lib/linalg/dsyr.cpp | 167 ++++++++ lib/linalg/dsytf2.cpp | 246 ++++++++++++ lib/linalg/dsytrf.cpp | 123 ++++++ lib/linalg/dsytrs.cpp | 214 ++++++++++ lib/linalg/dsytrs2.cpp | 180 +++++++++ lib/linalg/dtrevc3.cpp | 858 ++++++++++++++++++++++++++++++++++++++++ lib/linalg/dtrexc.cpp | 217 ++++++++++ lib/linalg/dtrtrs.cpp | 65 +++ lib/linalg/izamax.cpp | 46 +++ lib/linalg/zcop.cpp | 43 ++ lib/linalg/zdotu.cpp | 55 +++ lib/linalg/zgetrf.cpp | 90 +++++ lib/linalg/zgetrf2.cpp | 117 ++++++ lib/linalg/zgetri.cpp | 132 +++++++ lib/linalg/zhegs2.cpp | 197 +++++++++ lib/linalg/zhegst.cpp | 195 +++++++++ lib/linalg/zhegv.cpp | 115 ++++++ lib/linalg/zhemm.cpp | 271 +++++++++++++ lib/linalg/zher.cpp | 187 +++++++++ lib/linalg/zherk.cpp | 325 +++++++++++++++ lib/linalg/zhetf2.cpp | 439 ++++++++++++++++++++ lib/linalg/zhetrf.cpp | 123 ++++++ lib/linalg/zhetri.cpp | 319 +++++++++++++++ lib/linalg/zlahef.cpp | 520 ++++++++++++++++++++++++ lib/linalg/zlaswp.cpp | 79 ++++ lib/linalg/zlasyf.cpp | 431 ++++++++++++++++++++ lib/linalg/zlauu2.cpp | 100 +++++ lib/linalg/zlauum.cpp | 103 +++++ lib/linalg/zpotrf.cpp | 115 ++++++ lib/linalg/zpotrf2.cpp | 89 +++++ lib/linalg/zpotri.cpp | 40 ++ lib/linalg/zsymv.cpp | 263 ++++++++++++ lib/linalg/zsyr.cpp | 141 +++++++ lib/linalg/zsytf2.cpp | 356 +++++++++++++++++ lib/linalg/zsytrf.cpp | 124 ++++++ lib/linalg/zsytri.cpp | 292 ++++++++++++++ lib/linalg/ztrsm.cpp | 443 +++++++++++++++++++++ lib/linalg/ztrsv.cpp | 330 ++++++++++++++++ lib/linalg/ztrti2.cpp | 88 +++++ lib/linalg/ztrtri.cpp | 112 ++++++ 69 files changed, 15699 insertions(+) create mode 100644 lib/linalg/dbdsdc.cpp create mode 100644 lib/linalg/dcombssq.cpp create mode 100644 lib/linalg/dgebak.cpp create mode 100644 lib/linalg/dgebal.cpp create mode 100644 lib/linalg/dgehd2.cpp create mode 100644 lib/linalg/dgehrd.cpp create mode 100644 lib/linalg/dgesdd.cpp create mode 100644 lib/linalg/dhseqr.cpp create mode 100644 lib/linalg/dlaexc.cpp create mode 100644 lib/linalg/dlahqr.cpp create mode 100644 lib/linalg/dlahr2.cpp create mode 100644 lib/linalg/dlaln2.cpp create mode 100644 lib/linalg/dlanv2.cpp create mode 100644 lib/linalg/dlaqr0.cpp create mode 100644 lib/linalg/dlaqr1.cpp create mode 100644 lib/linalg/dlaqr2.cpp create mode 100644 lib/linalg/dlaqr3.cpp create mode 100644 lib/linalg/dlaqr4.cpp create mode 100644 lib/linalg/dlaqr5.cpp create mode 100644 lib/linalg/dlarfx.cpp create mode 100644 lib/linalg/dlasd0.cpp create mode 100644 lib/linalg/dlasd1.cpp create mode 100644 lib/linalg/dlasd2.cpp create mode 100644 lib/linalg/dlasd3.cpp create mode 100644 lib/linalg/dlasy2.cpp create mode 100644 lib/linalg/dlasyf.cpp create mode 100644 lib/linalg/dorghr.cpp create mode 100644 lib/linalg/dormhr.cpp create mode 100644 lib/linalg/dsyconv.cpp create mode 100644 lib/linalg/dsyr.cpp create mode 100644 lib/linalg/dsytf2.cpp create mode 100644 lib/linalg/dsytrf.cpp create mode 100644 lib/linalg/dsytrs.cpp create mode 100644 lib/linalg/dsytrs2.cpp create mode 100644 lib/linalg/dtrevc3.cpp create mode 100644 lib/linalg/dtrexc.cpp create mode 100644 lib/linalg/dtrtrs.cpp create mode 100644 lib/linalg/izamax.cpp create mode 100644 lib/linalg/zcop.cpp create mode 100644 lib/linalg/zdotu.cpp create mode 100644 lib/linalg/zgetrf.cpp create mode 100644 lib/linalg/zgetrf2.cpp create mode 100644 lib/linalg/zgetri.cpp create mode 100644 lib/linalg/zhegs2.cpp create mode 100644 lib/linalg/zhegst.cpp create mode 100644 lib/linalg/zhegv.cpp create mode 100644 lib/linalg/zhemm.cpp create mode 100644 lib/linalg/zher.cpp create mode 100644 lib/linalg/zherk.cpp create mode 100644 lib/linalg/zhetf2.cpp create mode 100644 lib/linalg/zhetrf.cpp create mode 100644 lib/linalg/zhetri.cpp create mode 100644 lib/linalg/zlahef.cpp create mode 100644 lib/linalg/zlaswp.cpp create mode 100644 lib/linalg/zlasyf.cpp create mode 100644 lib/linalg/zlauu2.cpp create mode 100644 lib/linalg/zlauum.cpp create mode 100644 lib/linalg/zpotrf.cpp create mode 100644 lib/linalg/zpotrf2.cpp create mode 100644 lib/linalg/zpotri.cpp create mode 100644 lib/linalg/zsymv.cpp create mode 100644 lib/linalg/zsyr.cpp create mode 100644 lib/linalg/zsytf2.cpp create mode 100644 lib/linalg/zsytrf.cpp create mode 100644 lib/linalg/zsytri.cpp create mode 100644 lib/linalg/ztrsm.cpp create mode 100644 lib/linalg/ztrsv.cpp create mode 100644 lib/linalg/ztrti2.cpp create mode 100644 lib/linalg/ztrtri.cpp diff --git a/lib/linalg/dbdsdc.cpp b/lib/linalg/dbdsdc.cpp new file mode 100644 index 0000000000..7f362f3be9 --- /dev/null +++ b/lib/linalg/dbdsdc.cpp @@ -0,0 +1,282 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__9 = 9; +static integer c__0 = 0; +static doublereal c_b15 = 1.; +static integer c__1 = 1; +static doublereal c_b29 = 0.; +int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *d__, doublereal *e, doublereal *u, + integer *ldu, doublereal *vt, integer *ldvt, doublereal *q, integer *iq, + doublereal *work, integer *iwork, integer *info, ftnlen uplo_len, ftnlen compq_len) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + doublereal d__1; + double d_lmp_sign(doublereal *, doublereal *), log(doublereal); + integer i__, j, k; + doublereal p, r__; + integer z__, ic, ii, kk; + doublereal cs; + integer is, iu; + doublereal sn; + integer nm1; + doublereal eps; + integer ivt, difl, difr, ierr, perm, mlvl, sqre; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, ftnlen, ftnlen, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer poles, iuplo, nsize, start; + extern int dlasd0_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, integer *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlasda_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, doublereal *, integer *, + integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + integer givcol; + extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *, ftnlen); + integer icompq; + doublereal orgnrm; + integer givnum, givptr, qstart, smlsiz, wstart, smlszp; + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --q; + --iq; + --work; + --iwork; + *info = 0; + iuplo = 0; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + iuplo = 1; + } + if (lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + iuplo = 2; + } + if (lsame_(compq, (char *)"N", (ftnlen)1, (ftnlen)1)) { + icompq = 0; + } else if (lsame_(compq, (char *)"P", (ftnlen)1, (ftnlen)1)) { + icompq = 1; + } else if (lsame_(compq, (char *)"I", (ftnlen)1, (ftnlen)1)) { + icompq = 2; + } else { + icompq = -1; + } + if (iuplo == 0) { + *info = -1; + } else if (icompq < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { + *info = -7; + } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DBDSDC", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + smlsiz = ilaenv_(&c__9, (char *)"DBDSDC", (char *)" ", &c__0, &c__0, &c__0, &c__0, (ftnlen)6, (ftnlen)1); + if (*n == 1) { + if (icompq == 1) { + q[1] = d_lmp_sign(&c_b15, &d__[1]); + q[smlsiz * *n + 1] = 1.; + } else if (icompq == 2) { + u[u_dim1 + 1] = d_lmp_sign(&c_b15, &d__[1]); + vt[vt_dim1 + 1] = 1.; + } + d__[1] = abs(d__[1]); + return 0; + } + nm1 = *n - 1; + wstart = 1; + qstart = 3; + if (icompq == 1) { + dcopy_(n, &d__[1], &c__1, &q[1], &c__1); + i__1 = *n - 1; + dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); + } + if (iuplo == 2) { + qstart = 5; + if (icompq == 2) { + wstart = (*n << 1) - 1; + } + i__1 = *n - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); + d__[i__] = r__; + e[i__] = sn * d__[i__ + 1]; + d__[i__ + 1] = cs * d__[i__ + 1]; + if (icompq == 1) { + q[i__ + (*n << 1)] = cs; + q[i__ + *n * 3] = sn; + } else if (icompq == 2) { + work[i__] = cs; + work[nm1 + i__] = -sn; + } + } + } + if (icompq == 0) { + dlasdq_((char *)"U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, + &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info, (ftnlen)1); + goto L40; + } + if (*n <= smlsiz) { + if (icompq == 2) { + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &u[u_offset], ldu, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt, (ftnlen)1); + dlasdq_((char *)"U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], + ldu, &u[u_offset], ldu, &work[wstart], info, (ftnlen)1); + } else if (icompq == 1) { + iu = 1; + ivt = iu + *n; + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n, (ftnlen)1); + dlasdq_((char *)"U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (qstart - 1) * *n], n, + &q[iu + (qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &work[wstart], + info, (ftnlen)1); + } + goto L40; + } + if (icompq == 2) { + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &u[u_offset], ldu, (ftnlen)1); + dlaset_((char *)"A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt, (ftnlen)1); + } + orgnrm = dlanst_((char *)"M", n, &d__[1], &e[1], (ftnlen)1); + if (orgnrm == 0.) { + return 0; + } + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr, (ftnlen)1); + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, &ierr, (ftnlen)1); + eps = dlamch_((char *)"Epsilon", (ftnlen)7) * .9; + mlvl = (integer)(log((doublereal)(*n) / (doublereal)(smlsiz + 1)) / log(2.)) + 1; + smlszp = smlsiz + 1; + if (icompq == 1) { + iu = 1; + ivt = smlsiz + 1; + difl = ivt + smlszp; + difr = difl + mlvl; + z__ = difr + (mlvl << 1); + ic = z__ + mlvl; + is = ic + 1; + poles = is + 1; + givnum = poles + (mlvl << 1); + k = 1; + givptr = 2; + perm = 3; + givcol = perm + mlvl; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) < eps) { + d__[i__] = d_lmp_sign(&eps, &d__[i__]); + } + } + start = 1; + sqre = 0; + i__1 = nm1; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { + if (i__ < nm1) { + nsize = i__ - start + 1; + } else if ((d__1 = e[i__], abs(d__1)) >= eps) { + nsize = *n - start + 1; + } else { + nsize = i__ - start + 1; + if (icompq == 2) { + u[*n + *n * u_dim1] = d_lmp_sign(&c_b15, &d__[*n]); + vt[*n + *n * vt_dim1] = 1.; + } else if (icompq == 1) { + q[*n + (qstart - 1) * *n] = d_lmp_sign(&c_b15, &d__[*n]); + q[*n + (smlsiz + qstart - 1) * *n] = 1.; + } + d__[*n] = (d__1 = d__[*n], abs(d__1)); + } + if (icompq == 2) { + dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start + start * u_dim1], ldu, + &vt[start + start * vt_dim1], ldvt, &smlsiz, &iwork[1], &work[wstart], + info); + } else { + dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[start], + &q[start + (iu + qstart - 2) * *n], n, &q[start + (ivt + qstart - 2) * *n], + &iq[start + k * *n], &q[start + (difl + qstart - 2) * *n], + &q[start + (difr + qstart - 2) * *n], &q[start + (z__ + qstart - 2) * *n], + &q[start + (poles + qstart - 2) * *n], &iq[start + givptr * *n], + &iq[start + givcol * *n], n, &iq[start + perm * *n], + &q[start + (givnum + qstart - 2) * *n], &q[start + (ic + qstart - 2) * *n], + &q[start + (is + qstart - 2) * *n], &work[wstart], &iwork[1], info); + } + if (*info != 0) { + return 0; + } + start = i__ + 1; + } + } + dlascl_((char *)"G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr, (ftnlen)1); +L40: + i__1 = *n; + for (ii = 2; ii <= i__1; ++ii) { + i__ = ii - 1; + kk = i__; + p = d__[i__]; + i__2 = *n; + for (j = ii; j <= i__2; ++j) { + if (d__[j] > p) { + kk = j; + p = d__[j]; + } + } + if (kk != i__) { + d__[kk] = d__[i__]; + d__[i__] = p; + if (icompq == 1) { + iq[i__] = kk; + } else if (icompq == 2) { + dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &c__1); + dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt); + } + } else if (icompq == 1) { + iq[i__] = i__; + } + } + if (icompq == 1) { + if (iuplo == 1) { + iq[*n] = 1; + } else { + iq[*n] = 0; + } + } + if (iuplo == 2 && icompq == 2) { + dlasr_((char *)"L", (char *)"V", (char *)"B", n, n, &work[1], &work[*n], &u[u_offset], ldu, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dcombssq.cpp b/lib/linalg/dcombssq.cpp new file mode 100644 index 0000000000..179be8ad9e --- /dev/null +++ b/lib/linalg/dcombssq.cpp @@ -0,0 +1,26 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dcombssq_(doublereal *v1, doublereal *v2) +{ + doublereal d__1; + --v2; + --v1; + if (v1[1] >= v2[1]) { + if (v1[1] != 0.) { + d__1 = v2[1] / v1[1]; + v1[2] += d__1 * d__1 * v2[2]; + } else { + v1[2] += v2[2]; + } + } else { + d__1 = v1[1] / v2[1]; + v1[2] = v2[2] + d__1 * d__1 * v1[2]; + v1[1] = v2[1]; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgebak.cpp b/lib/linalg/dgebak.cpp new file mode 100644 index 0000000000..ba0db07641 --- /dev/null +++ b/lib/linalg/dgebak.cpp @@ -0,0 +1,117 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, doublereal *scale, + integer *m, doublereal *v, integer *ldv, integer *info, ftnlen job_len, ftnlen side_len) +{ + integer v_dim1, v_offset, i__1; + integer i__, k; + doublereal s; + integer ii; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical leftv; + extern int xerbla_(char *, integer *, ftnlen); + logical rightv; + --scale; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + rightv = lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1); + leftv = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + *info = 0; + if (!lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) && + !lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!rightv && !leftv) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -4; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -5; + } else if (*m < 0) { + *info = -7; + } else if (*ldv < max(1, *n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEBAK", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (*m == 0) { + return 0; + } + if (lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1)) { + return 0; + } + if (*ilo == *ihi) { + goto L30; + } + if (lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) || lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (rightv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); + } + } + if (leftv) { + i__1 = *ihi; + for (i__ = *ilo; i__ <= i__1; ++i__) { + s = 1. / scale[i__]; + dscal_(m, &s, &v[i__ + v_dim1], ldv); + } + } + } +L30: + if (lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) || lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + if (rightv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L40; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer)scale[i__]; + if (k == i__) { + goto L40; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); + L40:; + } + } + if (leftv) { + i__1 = *n; + for (ii = 1; ii <= i__1; ++ii) { + i__ = ii; + if (i__ >= *ilo && i__ <= *ihi) { + goto L50; + } + if (i__ < *ilo) { + i__ = *ilo - ii; + } + k = (integer)scale[i__]; + if (k == i__) { + goto L50; + } + dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv); + L50:; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgebal.cpp b/lib/linalg/dgebal.cpp new file mode 100644 index 0000000000..c5301edcdd --- /dev/null +++ b/lib/linalg/dgebal.cpp @@ -0,0 +1,513 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__0 = 0; +static integer c_n1 = -1; +int dgebal_(char *job, integer *n, doublereal *a, integer *lda, integer *ilo, integer *ihi, + doublereal *scale, integer *info, ftnlen job_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2; + doublereal c__, f, g; + integer i__, j, k, l, m; + doublereal r__, s, ca, ra; + integer ica, ira, iexc; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + doublereal sfmin1, sfmin2, sfmax1, sfmax2; + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + logical noconv; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --scale; + *info = 0; + if (!lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1) && + !lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1) && !lsame_(job, (char *)"B", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEBAL", &i__1, (ftnlen)6); + return 0; + } + k = 1; + l = *n; + if (*n == 0) { + goto L210; + } + if (lsame_(job, (char *)"N", (ftnlen)1, (ftnlen)1)) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + scale[i__] = 1.; + } + goto L210; + } + if (lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1)) { + goto L120; + } + goto L50; +L20: + scale[m] = (doublereal)j; + if (j == m) { + goto L30; + } + dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); + i__1 = *n - k + 1; + dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda); +L30: + switch (iexc) { + case 1: + goto L40; + case 2: + goto L80; + } +L40: + if (l == 1) { + goto L210; + } + --l; +L50: + for (j = l; j >= 1; --j) { + i__1 = l; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ == j) { + goto L60; + } + if (a[j + i__ * a_dim1] != 0.) { + goto L70; + } + L60:; + } + m = l; + iexc = 1; + goto L20; + L70:; + } + goto L90; +L80: + ++k; +L90: + i__1 = l; + for (j = k; j <= i__1; ++j) { + i__2 = l; + for (i__ = k; i__ <= i__2; ++i__) { + if (i__ == j) { + goto L100; + } + if (a[i__ + j * a_dim1] != 0.) { + goto L110; + } + L100:; + } + m = k; + iexc = 2; + goto L20; + L110:; + } +L120: + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + scale[i__] = 1.; + } + if (lsame_(job, (char *)"P", (ftnlen)1, (ftnlen)1)) { + goto L210; + } + sfmin1 = dlamch_((char *)"S", (ftnlen)1) / dlamch_((char *)"P", (ftnlen)1); + sfmax1 = 1. / sfmin1; + sfmin2 = sfmin1 * 2.; + sfmax2 = 1. / sfmin2; +L140: + noconv = FALSE_; + i__1 = l; + for (i__ = k; i__ <= i__1; ++i__) { + i__2 = l - k + 1; + c__ = dnrm2_(&i__2, &a[k + i__ * a_dim1], &c__1); + i__2 = l - k + 1; + r__ = dnrm2_(&i__2, &a[i__ + k * a_dim1], lda); + ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1); + ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1)); + i__2 = *n - k + 1; + ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda); + ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1)); + if (c__ == 0. || r__ == 0.) { + goto L200; + } + g = r__ / 2.; + f = 1.; + s = c__ + r__; + L160: + d__1 = max(f, c__); + d__2 = min(r__, g); + if (c__ >= g || max(d__1, ca) >= sfmax2 || min(d__2, ra) <= sfmin2) { + goto L170; + } + d__1 = c__ + f + ca + r__ + g + ra; + if (disnan_(&d__1)) { + *info = -3; + i__2 = -(*info); + xerbla_((char *)"DGEBAL", &i__2, (ftnlen)6); + return 0; + } + f *= 2.; + c__ *= 2.; + ca *= 2.; + r__ /= 2.; + g /= 2.; + ra /= 2.; + goto L160; + L170: + g = c__ / 2.; + L180: + d__1 = min(f, c__), d__1 = min(d__1, g); + if (g < r__ || max(r__, ra) >= sfmax2 || min(d__1, ca) <= sfmin2) { + goto L190; + } + f /= 2.; + c__ /= 2.; + g /= 2.; + ca /= 2.; + r__ *= 2.; + ra *= 2.; + goto L180; + L190: + if (c__ + r__ >= s * .95) { + goto L200; + } + if (f < 1. && scale[i__] < 1.) { + if (f * scale[i__] <= sfmin1) { + goto L200; + } + } + if (f > 1. && scale[i__] > 1.) { + if (scale[i__] >= sfmax1 / f) { + goto L200; + } + } + g = 1. / f; + scale[i__] *= f; + noconv = TRUE_; + i__2 = *n - k + 1; + dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda); + dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1); + L200:; + } + if (noconv) { + goto L140; + } +L210: + *ilo = k; + *ihi = l; + return 0; +} +int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *a, integer *lda, doublereal *wr, + doublereal *wi, doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, + doublereal *work, integer *lwork, integer *info, ftnlen jobvl_len, ftnlen jobvr_len) +{ + integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; + doublereal d__1, d__2; + double sqrt(doublereal); + integer i__, k; + doublereal r__, cs, sn; + integer ihi; + doublereal scl; + integer ilo; + doublereal dum[1], eps; + integer lwork_trevc__, ibal; + char side[1]; + doublereal anrm; + integer ierr, itau; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + integer iwrk, nout; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern doublereal dlapy2_(doublereal *, doublereal *); + extern int dlabad_(doublereal *, doublereal *), + dgebak_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *, ftnlen, ftnlen), + dgebal_(char *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, + integer *, ftnlen); + logical scalea; + extern doublereal dlamch_(char *, ftnlen); + doublereal cscale; + extern doublereal dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + xerbla_(char *, integer *, ftnlen); + logical select[1]; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + doublereal bignum; + extern int dorghr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dhseqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, ftnlen, ftnlen); + integer minwrk, maxwrk; + logical wantvl; + doublereal smlnum; + integer hswork; + logical lquery, wantvr; + extern int dtrevc3_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, doublereal *, + integer *, integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --wr; + --wi; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + *info = 0; + lquery = *lwork == -1; + wantvl = lsame_(jobvl, (char *)"V", (ftnlen)1, (ftnlen)1); + wantvr = lsame_(jobvr, (char *)"V", (ftnlen)1, (ftnlen)1); + if (!wantvl && !lsame_(jobvl, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!wantvr && !lsame_(jobvr, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldvl < 1 || wantvl && *ldvl < *n) { + *info = -9; + } else if (*ldvr < 1 || wantvr && *ldvr < *n) { + *info = -11; + } + if (*info == 0) { + if (*n == 0) { + minwrk = 1; + maxwrk = 1; + } else { + maxwrk = (*n << 1) + + *n * ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, &c__1, n, &c__0, (ftnlen)6, (ftnlen)1); + if (wantvl) { + minwrk = *n << 2; + i__1 = maxwrk, + i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, (char *)"DORGHR", (char *)" ", n, &c__1, n, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1, i__2); + dhseqr_((char *)"S", (char *)"V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vl[vl_offset], + ldvl, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + dtrevc3_((char *)"L", (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[1], &c_n1, &ierr, (ftnlen)1, + (ftnlen)1); + lwork_trevc__ = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = max(i__1, i__2); + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1, i__2); + } else if (wantvr) { + minwrk = *n << 2; + i__1 = maxwrk, + i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, (char *)"DORGHR", (char *)" ", n, &c__1, n, &c_n1, + (ftnlen)6, (ftnlen)1); + maxwrk = max(i__1, i__2); + dhseqr_((char *)"S", (char *)"V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], + ldvr, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + dtrevc3_((char *)"R", (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, + &vr[vr_offset], ldvr, n, &nout, &work[1], &c_n1, &ierr, (ftnlen)1, + (ftnlen)1); + lwork_trevc__ = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + lwork_trevc__; + maxwrk = max(i__1, i__2); + i__1 = maxwrk, i__2 = *n << 2; + maxwrk = max(i__1, i__2); + } else { + minwrk = *n * 3; + dhseqr_((char *)"E", (char *)"N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], + ldvr, &work[1], &c_n1, info, (ftnlen)1, (ftnlen)1); + hswork = (integer)work[1]; + i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1, i__2), i__2 = *n + hswork; + maxwrk = max(i__1, i__2); + } + maxwrk = max(maxwrk, minwrk); + } + work[1] = (doublereal)maxwrk; + if (*lwork < minwrk && !lquery) { + *info = -13; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEEV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1); + bignum = 1. / smlnum; + dlabad_(&smlnum, &bignum); + smlnum = sqrt(smlnum) / eps; + bignum = 1. / smlnum; + anrm = dlange_((char *)"M", n, n, &a[a_offset], lda, dum, (ftnlen)1); + scalea = FALSE_; + if (anrm > 0. && anrm < smlnum) { + scalea = TRUE_; + cscale = smlnum; + } else if (anrm > bignum) { + scalea = TRUE_; + cscale = bignum; + } + if (scalea) { + dlascl_((char *)"G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } + ibal = 1; + dgebal_((char *)"B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr, (ftnlen)1); + itau = ibal + *n; + iwrk = itau + *n; + i__1 = *lwork - iwrk + 1; + dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); + if (wantvl) { + *(unsigned char *)side = 'L'; + dlacpy_((char *)"L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl, (ftnlen)1); + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], &i__1, &ierr); + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"S", (char *)"V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[vl_offset], ldvl, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + if (wantvr) { + *(unsigned char *)side = 'B'; + dlacpy_((char *)"F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, (ftnlen)1); + } + } else if (wantvr) { + *(unsigned char *)side = 'R'; + dlacpy_((char *)"L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr, (ftnlen)1); + i__1 = *lwork - iwrk + 1; + dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], &i__1, &ierr); + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"S", (char *)"V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + } else { + iwrk = itau; + i__1 = *lwork - iwrk + 1; + dhseqr_((char *)"E", (char *)"N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, + &work[iwrk], &i__1, info, (ftnlen)1, (ftnlen)1); + } + if (*info != 0) { + goto L50; + } + if (wantvl || wantvr) { + i__1 = *lwork - iwrk + 1; + dtrevc3_(side, (char *)"B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, &vr[vr_offset], + ldvr, n, &nout, &work[iwrk], &i__1, &ierr, (ftnlen)1, (ftnlen)1); + } + if (wantvl) { + dgebak_((char *)"B", (char *)"L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl, &ierr, (ftnlen)1, + (ftnlen)1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); + dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__1 = vl[k + i__ * vl_dim1]; + d__2 = vl[k + (i__ + 1) * vl_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], &cs, &sn, &r__); + drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * vl_dim1 + 1], &c__1, &cs, + &sn); + vl[k + (i__ + 1) * vl_dim1] = 0.; + } + } + } + if (wantvr) { + dgebak_((char *)"B", (char *)"R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr, &ierr, (ftnlen)1, + (ftnlen)1); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + if (wi[i__] == 0.) { + scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + } else if (wi[i__] > 0.) { + d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); + d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + scl = 1. / dlapy2_(&d__1, &d__2); + dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); + dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__1 = vr[k + i__ * vr_dim1]; + d__2 = vr[k + (i__ + 1) * vr_dim1]; + work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; + } + k = idamax_(n, &work[iwrk], &c__1); + dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], &cs, &sn, &r__); + drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * vr_dim1 + 1], &c__1, &cs, + &sn); + vr[k + (i__ + 1) * vr_dim1] = 0.; + } + } + } +L50: + if (scalea) { + i__1 = *n - *info; + i__3 = *n - *info; + i__2 = max(i__3, 1); + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 1], &i__2, &ierr, + (ftnlen)1); + i__1 = *n - *info; + i__3 = *n - *info; + i__2 = max(i__3, 1); + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 1], &i__2, &ierr, + (ftnlen)1); + if (*info > 0) { + i__1 = ilo - 1; + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], n, &ierr, (ftnlen)1); + i__1 = ilo - 1; + dlascl_((char *)"G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], n, &ierr, (ftnlen)1); + } + } + work[1] = (doublereal)maxwrk; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgehd2.cpp b/lib/linalg/dgehd2.cpp new file mode 100644 index 0000000000..9eaa873bd3 --- /dev/null +++ b/lib/linalg/dgehd2.cpp @@ -0,0 +1,57 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dgehd2_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + integer i__; + doublereal aii; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEHD2", &i__1, (ftnlen)6); + return 0; + } + i__1 = *ihi - 1; + for (i__ = *ilo; i__ <= i__1; ++i__) { + i__2 = *ihi - i__; + i__3 = i__ + 2; + dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n) + i__ * a_dim1], &c__1, + &tau[i__]); + aii = a[i__ + 1 + i__ * a_dim1]; + a[i__ + 1 + i__ * a_dim1] = 1.; + i__2 = *ihi - i__; + dlarf_((char *)"Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__], + &a[(i__ + 1) * a_dim1 + 1], lda, &work[1], (ftnlen)5); + i__2 = *ihi - i__; + i__3 = *n - i__; + dlarf_((char *)"Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__], + &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1], (ftnlen)4); + a[i__ + 1 + i__ * a_dim1] = aii; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgehrd.cpp b/lib/linalg/dgehrd.cpp new file mode 100644 index 0000000000..eb152b90ed --- /dev/null +++ b/lib/linalg/dgehrd.cpp @@ -0,0 +1,144 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__3 = 3; +static integer c__2 = 2; +static integer c__65 = 65; +static doublereal c_b25 = -1.; +static doublereal c_b26 = 1.; +int dgehrd_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + integer i__, j, ib; + doublereal ei; + integer nb, nh, nx, iwt; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer nbmin, iinfo; + extern int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), + dgehd2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *), + dlahr2_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *), + dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -8; + } + if (*info == 0) { + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nb = min(i__1, i__2); + lwkopt = *n * nb + 4160; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGEHRD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + tau[i__] = 0.; + } + i__1 = *n - 1; + for (i__ = max(1, *ihi); i__ <= i__1; ++i__) { + tau[i__] = 0.; + } + nh = *ihi - *ilo + 1; + if (nh <= 1) { + work[1] = 1.; + return 0; + } + i__1 = 64, i__2 = ilaenv_(&c__1, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nb = min(i__1, i__2); + nbmin = 2; + if (nb > 1 && nb < nh) { + i__1 = nb, i__2 = ilaenv_(&c__3, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nx = max(i__1, i__2); + if (nx < nh) { + if (*lwork < *n * nb + 4160) { + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"DGEHRD", (char *)" ", n, ilo, ihi, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + if (*lwork >= *n * nbmin + 4160) { + nb = (*lwork - 4160) / *n; + } else { + nb = 1; + } + } + } + } + ldwork = *n; + if (nb < nbmin || nb >= nh) { + i__ = *ilo; + } else { + iwt = *n * nb + 1; + i__1 = *ihi - 1 - nx; + i__2 = nb; + for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = nb, i__4 = *ihi - i__; + ib = min(i__3, i__4); + dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], &work[iwt], &c__65, + &work[1], &ldwork); + ei = a[i__ + ib + (i__ + ib - 1) * a_dim1]; + a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.; + i__3 = *ihi - i__ - ib + 1; + dgemm_((char *)"No transpose", (char *)"Transpose", ihi, &i__3, &ib, &c_b25, &work[1], &ldwork, + &a[i__ + ib + i__ * a_dim1], lda, &c_b26, &a[(i__ + ib) * a_dim1 + 1], lda, + (ftnlen)12, (ftnlen)9); + a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei; + i__3 = ib - 1; + dtrmm_((char *)"Right", (char *)"Lower", (char *)"Transpose", (char *)"Unit", &i__, &i__3, &c_b26, + &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork, (ftnlen)5, (ftnlen)5, + (ftnlen)9, (ftnlen)4); + i__3 = ib - 2; + for (j = 0; j <= i__3; ++j) { + daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + j + 1) * a_dim1 + 1], + &c__1); + } + i__3 = *ihi - i__; + i__4 = *n - i__ - ib + 1; + dlarfb_((char *)"Left", (char *)"Transpose", (char *)"Forward", (char *)"Columnwise", &i__3, &i__4, &ib, + &a[i__ + 1 + i__ * a_dim1], lda, &work[iwt], &c__65, + &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork, (ftnlen)4, (ftnlen)9, + (ftnlen)7, (ftnlen)10); + } + } + dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo); + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dgesdd.cpp b/lib/linalg/dgesdd.cpp new file mode 100644 index 0000000000..59dbee7210 --- /dev/null +++ b/lib/linalg/dgesdd.cpp @@ -0,0 +1,788 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c_n1 = -1; +static integer c__0 = 0; +static doublereal c_b63 = 0.; +static integer c__1 = 1; +static doublereal c_b84 = 1.; +int dgesdd_(char *jobz, integer *m, integer *n, doublereal *a, integer *lda, doublereal *s, + doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *work, + integer *lwork, integer *iwork, integer *info, ftnlen jobz_len) +{ + integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2, i__3; + double sqrt(doublereal); + integer lwork_dorglq_mn__, lwork_dorglq_nn__, lwork_dorgqr_mm__, lwork_dorgqr_mn__, i__, ie, + lwork_dorgbr_p_mm__, il, lwork_dorgbr_q_nn__, ir, iu, blk; + doublereal dum[1], eps; + integer ivt, iscl; + doublereal anrm; + integer idum[1], ierr, itau, lwork_dormbr_qln_mm__, lwork_dormbr_qln_mn__, + lwork_dormbr_qln_nn__, lwork_dormbr_prt_mm__, lwork_dormbr_prt_mn__, lwork_dormbr_prt_nn__; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer chunk, minmn, wrkbl, itaup, itauq, mnthr; + logical wntqa; + integer nwork; + logical wntqn, wntqo, wntqs; + extern int dbdsdc_(char *, char *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, integer *, ftnlen, ftnlen), + dgebrd_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, integer *); + extern doublereal dlamch_(char *, ftnlen), + dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen); + integer bdspac; + extern int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen), + dorgbr_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *, ftnlen); + doublereal bignum; + extern int dormbr_(char *, char *, char *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, + integer *, ftnlen, ftnlen, ftnlen), + dorglq_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt; + doublereal smlnum; + logical wntqas, lquery; + integer lwork_dgebrd_mm__, lwork_dgebrd_mn__, lwork_dgebrd_nn__, lwork_dgelqf_mn__, + lwork_dgeqrf_mn__; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --s; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --work; + --iwork; + *info = 0; + minmn = min(*m, *n); + wntqa = lsame_(jobz, (char *)"A", (ftnlen)1, (ftnlen)1); + wntqs = lsame_(jobz, (char *)"S", (ftnlen)1, (ftnlen)1); + wntqas = wntqa || wntqs; + wntqo = lsame_(jobz, (char *)"O", (ftnlen)1, (ftnlen)1); + wntqn = lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!(wntqa || wntqs || wntqo || wntqn)) { + *info = -1; + } else if (*m < 0) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *m)) { + *info = -5; + } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *m) { + *info = -8; + } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn || + wntqo && *m >= *n && *ldvt < *n) { + *info = -10; + } + if (*info == 0) { + minwrk = 1; + maxwrk = 1; + bdspac = 0; + mnthr = (integer)(minmn * 11. / 6.); + if (*m >= *n && minmn > 0) { + if (wntqn) { + bdspac = *n * 7; + } else { + bdspac = *n * 3 * *n + (*n << 2); + } + dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mn__ = (integer)dum[0]; + dgebrd_(n, n, dum, n, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_nn__ = (integer)dum[0]; + dgeqrf_(m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dgeqrf_mn__ = (integer)dum[0]; + dorgbr_((char *)"Q", n, n, n, dum, n, dum, dum, &c_n1, &ierr, (ftnlen)1); + lwork_dorgbr_q_nn__ = (integer)dum[0]; + dorgqr_(m, m, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dorgqr_mm__ = (integer)dum[0]; + dorgqr_(m, n, n, dum, m, dum, dum, &c_n1, &ierr); + lwork_dorgqr_mn__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mm__ = (integer)dum[0]; + if (*m >= mnthr) { + if (wntqn) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = bdspac + *n; + maxwrk = max(i__1, i__2); + minwrk = bdspac + *n; + } else if (wntqo) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + (*n << 1) * *n; + minwrk = bdspac + (*n << 1) * *n + *n * 3; + } else if (wntqs) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *n * *n; + minwrk = bdspac + *n * *n + *n * 3; + } else if (wntqa) { + wrkbl = *n + lwork_dgeqrf_mn__; + i__1 = wrkbl, i__2 = *n + lwork_dorgqr_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dgebrd_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *n * *n; + i__1 = *n * 3 + bdspac, i__2 = *n + *m; + minwrk = *n * *n + max(i__1, i__2); + } + } else { + wrkbl = *n * 3 + lwork_dgebrd_mn__; + if (wntqn) { + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } else if (wntqo) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *n; + i__1 = *m, i__2 = *n * *n + bdspac; + minwrk = *n * 3 + max(i__1, i__2); + } else if (wntqs) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } else if (wntqa) { + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *n * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *n * 3 + max(*m, bdspac); + } + } + } else if (minmn > 0) { + if (wntqn) { + bdspac = *m * 7; + } else { + bdspac = *m * 3 * *m + (*m << 2); + } + dgebrd_(m, n, dum, m, dum, dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mn__ = (integer)dum[0]; + dgebrd_(m, m, &a[a_offset], m, &s[1], dum, dum, dum, dum, &c_n1, &ierr); + lwork_dgebrd_mm__ = (integer)dum[0]; + dgelqf_(m, n, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_dgelqf_mn__ = (integer)dum[0]; + dorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr); + lwork_dorglq_nn__ = (integer)dum[0]; + dorglq_(m, n, m, &a[a_offset], m, dum, dum, &c_n1, &ierr); + lwork_dorglq_mn__ = (integer)dum[0]; + dorgbr_((char *)"P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr, (ftnlen)1); + lwork_dorgbr_p_mm__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_mm__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_mn__ = (integer)dum[0]; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, dum, n, dum, dum, n, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_prt_nn__ = (integer)dum[0]; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, dum, m, dum, dum, m, dum, &c_n1, &ierr, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + lwork_dormbr_qln_mm__ = (integer)dum[0]; + if (*n >= mnthr) { + if (wntqn) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = bdspac + *m; + maxwrk = max(i__1, i__2); + minwrk = bdspac + *m; + } else if (wntqo) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + (*m << 1) * *m; + minwrk = bdspac + (*m << 1) * *m + *m * 3; + } else if (wntqs) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *m; + minwrk = bdspac + *m * *m + *m * 3; + } else if (wntqa) { + wrkbl = *m + lwork_dgelqf_mn__; + i__1 = wrkbl, i__2 = *m + lwork_dorglq_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dgebrd_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *m; + i__1 = *m * 3 + bdspac, i__2 = *m + *n; + minwrk = *m * *m + max(i__1, i__2); + } + } else { + wrkbl = *m * 3 + lwork_dgebrd_mn__; + if (wntqn) { + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } else if (wntqo) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + wrkbl = max(i__1, i__2); + maxwrk = wrkbl + *m * *n; + i__1 = *n, i__2 = *m * *m + bdspac; + minwrk = *m * 3 + max(i__1, i__2); + } else if (wntqs) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_mn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } else if (wntqa) { + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_qln_mm__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + lwork_dormbr_prt_nn__; + wrkbl = max(i__1, i__2); + i__1 = wrkbl, i__2 = *m * 3 + bdspac; + maxwrk = max(i__1, i__2); + minwrk = *m * 3 + max(*n, bdspac); + } + } + } + maxwrk = max(maxwrk, minwrk); + work[1] = (doublereal)maxwrk; + if (*lwork < minwrk && !lquery) { + *info = -12; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DGESDD", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = sqrt(dlamch_((char *)"S", (ftnlen)1)) / eps; + bignum = 1. / smlnum; + anrm = dlange_((char *)"M", m, n, &a[a_offset], lda, dum, (ftnlen)1); + iscl = 0; + if (anrm > 0. && anrm < smlnum) { + iscl = 1; + dlascl_((char *)"G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } else if (anrm > bignum) { + iscl = 1; + dlascl_((char *)"G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &ierr, (ftnlen)1); + } + if (*m >= *n) { + if (*m >= mnthr) { + if (wntqn) { + itau = 1; + nwork = itau + *n; + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__1, &i__2, &c_b63, &c_b63, &a[a_dim1 + 2], lda, (ftnlen)1); + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + nwork = ie + *n; + dbdsdc_((char *)"U", (char *)"N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ir = 1; + if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) { + ldwrkr = *lda; + } else { + ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n; + } + itau = ir + ldwrkr * *n; + nwork = itau + *n; + i__1 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr, (ftnlen)1); + i__1 = *n - 1; + i__2 = *n - 1; + dlaset_((char *)"L", &i__1, &i__2, &c_b63, &c_b63, &work[ir + 1], &ldwrkr, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__1 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + iu = nwork; + nwork = iu + *n * *n; + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], n, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &work[ir], &ldwrkr, &work[itauq], &work[iu], n, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &work[ir], &ldwrkr, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *m; + i__2 = ldwrkr; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = *m - i__ + 1; + chunk = min(i__3, ldwrkr); + dgemm_((char *)"N", (char *)"N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], lda, &work[iu], n, + &c_b63, &work[ir], &ldwrkr, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + a_dim1], lda, (ftnlen)1); + } + } else if (wntqs) { + ir = 1; + ldwrkr = *n; + itau = ir + ldwrkr * *n; + nwork = itau + *n; + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr, (ftnlen)1); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_((char *)"L", &i__2, &i__1, &c_b63, &c_b63, &work[ir + 1], &ldwrkr, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &work[ir], &ldwrkr, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &work[ir], &ldwrkr, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b84, &a[a_offset], lda, &work[ir], &ldwrkr, &c_b63, + &u[u_offset], ldu, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + iu = 1; + ldwrku = *n; + itau = iu + ldwrku * *n; + nwork = itau + *n; + i__2 = *lwork - nwork + 1; + dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"L", m, n, &a[a_offset], lda, &u[u_offset], ldu, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork], &i__2, &ierr); + i__2 = *n - 1; + i__1 = *n - 1; + dlaset_((char *)"L", &i__2, &i__1, &c_b63, &c_b63, &a[a_dim1 + 2], lda, (ftnlen)1); + ie = itau; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], n, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", n, n, n, &a[a_offset], lda, &work[itauq], &work[iu], &ldwrku, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, n, &c_b84, &u[u_offset], ldu, &work[iu], &ldwrku, &c_b63, + &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &u[u_offset], ldu, (ftnlen)1); + } + } else { + ie = 1; + itauq = ie + *n; + itaup = itauq + *n; + nwork = itaup + *n; + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + if (wntqn) { + dbdsdc_((char *)"U", (char *)"N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + iu = nwork; + if (*lwork >= *m * *n + *n * 3 + bdspac) { + ldwrku = *m; + nwork = iu + ldwrku * *n; + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &work[iu], &ldwrku, (ftnlen)1); + ir = -1; + } else { + ldwrku = *n; + nwork = iu + ldwrku * *n; + ir = nwork; + ldwrkr = (*lwork - *n * *n - *n * 3) / *n; + } + nwork = iu + ldwrku * *n; + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &work[iu], &ldwrku, &vt[vt_offset], ldvt, + dum, idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + if (*lwork >= *m * *n + *n * 3 + bdspac) { + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, &a[a_offset], lda, &work[itauq], &work[iu], + &ldwrku, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &work[iu], &ldwrku, &a[a_offset], lda, (ftnlen)1); + } else { + i__2 = *lwork - nwork + 1; + dorgbr_((char *)"Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[nwork], &i__2, + &ierr, (ftnlen)1); + i__2 = *m; + i__1 = ldwrkr; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = *m - i__ + 1; + chunk = min(i__3, ldwrkr); + dgemm_((char *)"N", (char *)"N", &chunk, n, n, &c_b84, &a[i__ + a_dim1], lda, &work[iu], + &ldwrku, &c_b63, &work[ir], &ldwrkr, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", &chunk, n, &work[ir], &ldwrkr, &a[i__ + a_dim1], lda, + (ftnlen)1); + } + } + } else if (wntqs) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &u[u_offset], ldu, (ftnlen)1); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, n, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, n, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + dlaset_((char *)"F", m, m, &c_b63, &c_b63, &u[u_offset], ldu, (ftnlen)1); + dbdsdc_((char *)"U", (char *)"I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + if (*m > *n) { + i__1 = *m - *n; + i__2 = *m - *n; + dlaset_((char *)"F", &i__1, &i__2, &c_b63, &c_b84, &u[*n + 1 + (*n + 1) * u_dim1], ldu, + (ftnlen)1); + } + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } else { + if (*n >= mnthr) { + if (wntqn) { + itau = 1; + nwork = itau + *m; + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_((char *)"U", &i__1, &i__2, &c_b63, &c_b63, &a[(a_dim1 << 1) + 1], lda, (ftnlen)1); + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + nwork = ie + *m; + dbdsdc_((char *)"U", (char *)"N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ivt = 1; + il = ivt + *m * *m; + if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) { + ldwrkl = *m; + chunk = *n; + } else { + ldwrkl = *m; + chunk = (*lwork - *m * *m) / *m; + } + itau = il + ldwrkl * *m; + nwork = itau + *m; + i__1 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwrkl, (ftnlen)1); + i__1 = *m - 1; + i__2 = *m - 1; + dlaset_((char *)"U", &i__1, &i__2, &c_b63, &c_b63, &work[il + ldwrkl], &ldwrkl, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__1 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__1, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], m, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &work[il], &ldwrkl, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &work[il], &ldwrkl, &work[itaup], &work[ivt], m, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *n; + i__2 = chunk; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = *n - i__ + 1; + blk = min(i__3, chunk); + dgemm_((char *)"N", (char *)"N", m, &blk, m, &c_b84, &work[ivt], m, &a[i__ * a_dim1 + 1], lda, + &c_b63, &work[il], &ldwrkl, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1 + 1], lda, (ftnlen)1); + } + } else if (wntqs) { + il = 1; + ldwrkl = *m; + itau = il + ldwrkl * *m; + nwork = itau + *m; + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"L", m, m, &a[a_offset], lda, &work[il], &ldwrkl, (ftnlen)1); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_((char *)"U", &i__2, &i__1, &c_b63, &c_b63, &work[il + ldwrkl], &ldwrkl, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &work[il], &ldwrkl, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &work[il], &ldwrkl, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b84, &work[il], &ldwrkl, &a[a_offset], lda, &c_b63, + &vt[vt_offset], ldvt, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + ivt = 1; + ldwkvt = *m; + itau = ivt + ldwkvt * *m; + nwork = itau + *m; + i__2 = *lwork - nwork + 1; + dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__2, &ierr); + dlacpy_((char *)"U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[nwork], &i__2, &ierr); + i__2 = *m - 1; + i__1 = *m - 1; + dlaset_((char *)"U", &i__2, &i__1, &c_b63, &c_b63, &a[(a_dim1 << 1) + 1], lda, (ftnlen)1); + ie = itau; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + dbdsdc_((char *)"U", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], &ldwkvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, m, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, m, m, &a[a_offset], lda, &work[itaup], &work[ivt], + &ldwkvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", m, n, m, &c_b84, &work[ivt], &ldwkvt, &vt[vt_offset], ldvt, &c_b63, + &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt, (ftnlen)1); + } + } else { + ie = 1; + itauq = ie + *m; + itaup = itauq + *m; + nwork = itaup + *m; + i__2 = *lwork - nwork + 1; + dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &work[itaup], + &work[nwork], &i__2, &ierr); + if (wntqn) { + dbdsdc_((char *)"L", (char *)"N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1, dum, idum, + &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + } else if (wntqo) { + ldwkvt = *m; + ivt = nwork; + if (*lwork >= *m * *n + *m * 3 + bdspac) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &work[ivt], &ldwkvt, (ftnlen)1); + nwork = ivt + ldwkvt * *n; + il = -1; + } else { + nwork = ivt + ldwkvt * *m; + il = nwork; + chunk = (*lwork - *m * *m - *m * 3) / *m; + } + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &work[ivt], &ldwkvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__2 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + if (*lwork >= *m * *n + *m * 3 + bdspac) { + i__2 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, &a[a_offset], lda, &work[itaup], &work[ivt], + &ldwkvt, &work[nwork], &i__2, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda, (ftnlen)1); + } else { + i__2 = *lwork - nwork + 1; + dorgbr_((char *)"P", m, n, m, &a[a_offset], lda, &work[itaup], &work[nwork], &i__2, + &ierr, (ftnlen)1); + i__2 = *n; + i__1 = chunk; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = *n - i__ + 1; + blk = min(i__3, chunk); + dgemm_((char *)"N", (char *)"N", m, &blk, m, &c_b84, &work[ivt], &ldwkvt, + &a[i__ * a_dim1 + 1], lda, &c_b63, &work[il], m, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"F", m, &blk, &work[il], m, &a[i__ * a_dim1 + 1], lda, (ftnlen)1); + } + } + } else if (wntqs) { + dlaset_((char *)"F", m, n, &c_b63, &c_b63, &vt[vt_offset], ldvt, (ftnlen)1); + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", m, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } else if (wntqa) { + dlaset_((char *)"F", n, n, &c_b63, &c_b63, &vt[vt_offset], ldvt, (ftnlen)1); + dbdsdc_((char *)"L", (char *)"I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[vt_offset], ldvt, dum, + idum, &work[nwork], &iwork[1], info, (ftnlen)1, (ftnlen)1); + if (*n > *m) { + i__1 = *n - *m; + i__2 = *n - *m; + dlaset_((char *)"F", &i__1, &i__2, &c_b63, &c_b84, &vt[*m + 1 + (*m + 1) * vt_dim1], + ldvt, (ftnlen)1); + } + i__1 = *lwork - nwork + 1; + dormbr_((char *)"Q", (char *)"L", (char *)"N", m, m, n, &a[a_offset], lda, &work[itauq], &u[u_offset], ldu, + &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *lwork - nwork + 1; + dormbr_((char *)"P", (char *)"R", (char *)"T", n, n, m, &a[a_offset], lda, &work[itaup], &vt[vt_offset], + ldvt, &work[nwork], &i__1, &ierr, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } + if (iscl == 1) { + if (anrm > bignum) { + dlascl_((char *)"G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &minmn, &ierr, + (ftnlen)1); + } + if (anrm < smlnum) { + dlascl_((char *)"G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &minmn, &ierr, + (ftnlen)1); + } + } + work[1] = (doublereal)maxwrk; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dhseqr.cpp b/lib/linalg/dhseqr.cpp new file mode 100644 index 0000000000..2ac0219858 --- /dev/null +++ b/lib/linalg/dhseqr.cpp @@ -0,0 +1,145 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b11 = 0.; +static doublereal c_b12 = 1.; +static integer c__12 = 12; +static integer c__2 = 2; +static integer c__49 = 49; +int dhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, doublereal *z__, integer *ldz, + doublereal *work, integer *lwork, integer *info, ftnlen job_len, ftnlen compz_len) +{ + address a__1[2]; + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3; + doublereal d__1; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer i__; + doublereal hl[2401]; + integer kbot, nmin; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical initz; + doublereal workl[49]; + logical wantt, wantz; + extern int dlaqr0_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + logical lquery; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + wantt = lsame_(job, (char *)"S", (ftnlen)1, (ftnlen)1); + initz = lsame_(compz, (char *)"I", (ftnlen)1, (ftnlen)1); + wantz = initz || lsame_(compz, (char *)"V", (ftnlen)1, (ftnlen)1); + work[1] = (doublereal)max(1, *n); + lquery = *lwork == -1; + *info = 0; + if (!lsame_(job, (char *)"E", (ftnlen)1, (ftnlen)1) && !wantt) { + *info = -1; + } else if (!lsame_(compz, (char *)"N", (ftnlen)1, (ftnlen)1) && !wantz) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -4; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -5; + } else if (*ldh < max(1, *n)) { + *info = -7; + } else if (*ldz < 1 || wantz && *ldz < max(1, *n)) { + *info = -11; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -13; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DHSEQR", &i__1, (ftnlen)6); + return 0; + } else if (*n == 0) { + return 0; + } else if (lquery) { + dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, &work[1], lwork, info); + d__1 = (doublereal)max(1, *n); + work[1] = max(d__1, work[1]); + return 0; + } else { + i__1 = *ilo - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } + i__1 = *n; + for (i__ = *ihi + 1; i__ <= i__1; ++i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } + if (initz) { + dlaset_((char *)"A", n, n, &c_b11, &c_b12, &z__[z_offset], ldz, (ftnlen)1); + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + i__2[0] = 1, a__1[0] = job; + i__2[1] = 1, a__1[1] = compz; + s_lmp_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); + nmin = ilaenv_(&c__12, (char *)"DHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + if (*n > nmin) { + dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, &work[1], lwork, info); + } else { + dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, + &z__[z_offset], ldz, info); + if (*info > 0) { + kbot = *info; + if (*n >= 49) { + dlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, + ihi, &z__[z_offset], ldz, &work[1], lwork, info); + } else { + dlacpy_((char *)"A", n, n, &h__[h_offset], ldh, hl, &c__49, (ftnlen)1); + hl[*n + 1 + *n * 49 - 50] = 0.; + i__1 = 49 - *n; + dlaset_((char *)"A", &c__49, &i__1, &c_b11, &c_b11, &hl[(*n + 1) * 49 - 49], &c__49, + (ftnlen)1); + dlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &wr[1], &wi[1], ilo, + ihi, &z__[z_offset], ldz, workl, &c__49, info); + if (wantt || *info != 0) { + dlacpy_((char *)"A", n, n, hl, &c__49, &h__[h_offset], ldh, (ftnlen)1); + } + } + } + } + if ((wantt || *info != 0) && *n > 2) { + i__1 = *n - 2; + i__3 = *n - 2; + dlaset_((char *)"L", &i__1, &i__3, &c_b11, &c_b11, &h__[h_dim1 + 3], ldh, (ftnlen)1); + } + d__1 = (doublereal)max(1, *n); + work[1] = max(d__1, work[1]); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaexc.cpp b/lib/linalg/dlaexc.cpp new file mode 100644 index 0000000000..9d528080cf --- /dev/null +++ b/lib/linalg/dlaexc.cpp @@ -0,0 +1,214 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__4 = 4; +static logical c_false = FALSE_; +static integer c_n1 = -1; +static integer c__2 = 2; +static integer c__3 = 3; +int dlaexc_(logical *wantq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + integer *j1, integer *n1, integer *n2, doublereal *work, integer *info) +{ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + doublereal d__1, d__2, d__3; + doublereal d__[16]; + integer k; + doublereal u[3], x[4]; + integer j2, j3, j4; + doublereal u1[3], u2[3]; + integer nd; + doublereal cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau, tau1, tau2; + integer ierr; + doublereal temp; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + doublereal scale, dnorm, xnorm; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlasy2_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *); + extern doublereal dlamch_(char *, ftnlen), + dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlarfx_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + doublereal *, ftnlen); + doublereal thresh, smlnum; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + *info = 0; + if (*n == 0 || *n1 == 0 || *n2 == 0) { + return 0; + } + if (*j1 + *n1 > *n) { + return 0; + } + j2 = *j1 + 1; + j3 = *j1 + 2; + j4 = *j1 + 3; + if (*n1 == 1 && *n2 == 1) { + t11 = t[*j1 + *j1 * t_dim1]; + t22 = t[j2 + j2 * t_dim1]; + d__1 = t22 - t11; + dlartg_(&t[*j1 + j2 * t_dim1], &d__1, &cs, &sn, &temp); + if (j3 <= *n) { + i__1 = *n - *j1 - 1; + drot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1], ldt, &cs, &sn); + } + i__1 = *j1 - 1; + drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, &cs, &sn); + t[*j1 + *j1 * t_dim1] = t22; + t[j2 + j2 * t_dim1] = t11; + if (*wantq) { + drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, &cs, &sn); + } + } else { + nd = *n1 + *n2; + dlacpy_((char *)"Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4, (ftnlen)4); + dnorm = dlange_((char *)"Max", &nd, &nd, d__, &c__4, &work[1], (ftnlen)3); + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1) / eps; + d__1 = eps * 10. * dnorm; + thresh = max(d__1, smlnum); + dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 + (*n1 + 1 << 2) - 5], + &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, &scale, x, &c__2, &xnorm, &ierr); + k = *n1 + *n1 + *n2 - 3; + switch (k) { + case 1: + goto L10; + case 2: + goto L20; + case 3: + goto L30; + } + L10: + u[0] = scale; + u[1] = x[0]; + u[2] = x[2]; + dlarfg_(&c__3, &u[2], u, &c__1, &tau); + u[2] = 1.; + t11 = t[*j1 + *j1 * t_dim1]; + dlarfx_((char *)"L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + d__2 = abs(d__[2]), d__3 = abs(d__[6]), d__2 = max(d__2, d__3), + d__3 = (d__1 = d__[10] - t11, abs(d__1)); + if (max(d__2, d__3) > thresh) { + goto L50; + } + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j3 + j3 * t_dim1] = t11; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + goto L40; + L20: + u[0] = -x[0]; + u[1] = -x[1]; + u[2] = scale; + dlarfg_(&c__3, u, &u[1], &c__1, &tau); + u[0] = 1.; + t33 = t[j3 + j3 * t_dim1]; + dlarfx_((char *)"L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1], (ftnlen)1); + d__2 = abs(d__[1]), d__3 = abs(d__[2]), d__2 = max(d__2, d__3), + d__3 = (d__1 = d__[0] - t33, abs(d__1)); + if (max(d__2, d__3) > thresh) { + goto L50; + } + dlarfx_((char *)"R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + i__1 = *n - *j1; + dlarfx_((char *)"L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[1], (ftnlen)1); + t[*j1 + *j1 * t_dim1] = t33; + t[j2 + *j1 * t_dim1] = 0.; + t[j3 + *j1 * t_dim1] = 0.; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + goto L40; + L30: + u1[0] = -x[0]; + u1[1] = -x[1]; + u1[2] = scale; + dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1); + u1[0] = 1.; + temp = -tau1 * (x[2] + u1[1] * x[3]); + u2[0] = -temp * u1[1] - x[3]; + u2[1] = -temp * u1[2]; + u2[2] = scale; + dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2); + u2[0] = 1.; + dlarfx_((char *)"L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1], (ftnlen)1); + d__1 = abs(d__[2]), d__2 = abs(d__[6]), d__1 = max(d__1, d__2), d__2 = abs(d__[3]), + d__1 = max(d__1, d__2), d__2 = abs(d__[7]); + if (max(d__1, d__2) > thresh) { + goto L50; + } + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + i__1 = *n - *j1 + 1; + dlarfx_((char *)"L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, &work[1], (ftnlen)1); + dlarfx_((char *)"R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1], (ftnlen)1); + t[j3 + *j1 * t_dim1] = 0.; + t[j3 + j2 * t_dim1] = 0.; + t[j4 + *j1 * t_dim1] = 0.; + t[j4 + j2 * t_dim1] = 0.; + if (*wantq) { + dlarfx_((char *)"R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + dlarfx_((char *)"R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[1], (ftnlen)1); + } + L40: + if (*n2 == 2) { + dlanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + *j1 * t_dim1], + &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, &wi2, &cs, &sn); + i__1 = *n - *j1 - 1; + drot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2) * t_dim1], ldt, &cs, + &sn); + i__1 = *j1 - 1; + drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1, &cs, &sn); + if (*wantq) { + drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1, &cs, &sn); + } + } + if (*n1 == 2) { + j3 = *j1 + *n2; + j4 = j3 + 1; + dlanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 * t_dim1], + &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, &cs, &sn); + if (j3 + 2 <= *n) { + i__1 = *n - j3 - 1; + drot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2) * t_dim1], ldt, &cs, + &sn); + } + i__1 = j3 - 1; + drot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], &c__1, &cs, &sn); + if (*wantq) { + drot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], &c__1, &cs, &sn); + } + } + } + return 0; +L50: + *info = 1; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlahqr.cpp b/lib/linalg/dlahqr.cpp new file mode 100644 index 0000000000..c2f2775b9b --- /dev/null +++ b/lib/linalg/dlahqr.cpp @@ -0,0 +1,311 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + double sqrt(doublereal); + integer i__, j, k, l, m; + doublereal s, v[3]; + integer i1, i2; + doublereal t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs; + integer nh; + doublereal sn; + integer nr; + doublereal tr; + integer nz; + doublereal det, h21s; + integer its; + doublereal ulp, sum, tst, rt1i, rt2i, rt1r, rt2r; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer itmax; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); + doublereal safmin, safmax, rtdisc, smlnum; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + *info = 0; + if (*n == 0) { + return 0; + } + if (*ilo == *ihi) { + wr[*ilo] = h__[*ilo + *ilo * h_dim1]; + wi[*ilo] = 0.; + return 0; + } + i__1 = *ihi - 3; + for (j = *ilo; j <= i__1; ++j) { + h__[j + 2 + j * h_dim1] = 0.; + h__[j + 3 + j * h_dim1] = 0.; + } + if (*ilo <= *ihi - 2) { + h__[*ihi + (*ihi - 2) * h_dim1] = 0.; + } + nh = *ihi - *ilo + 1; + nz = *ihiz - *iloz + 1; + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)nh / ulp); + if (*wantt) { + i1 = 1; + i2 = *n; + } + itmax = max(10, nh) * 30; + i__ = *ihi; +L20: + l = *ilo; + if (i__ < *ilo) { + goto L160; + } + i__1 = itmax; + for (its = 0; its <= i__1; ++its) { + i__2 = l + 1; + for (k = i__; k >= i__2; --k) { + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= smlnum) { + goto L40; + } + tst = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[k + k * h_dim1], abs(d__2)); + if (tst == 0.) { + if (k - 2 >= *ilo) { + tst += (d__1 = h__[k - 1 + (k - 2) * h_dim1], abs(d__1)); + } + if (k + 1 <= *ihi) { + tst += (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)); + } + } + if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= ulp * tst) { + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ab = max(d__3, d__4); + d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + k * h_dim1], abs(d__2)); + ba = min(d__3, d__4); + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], abs(d__2)); + aa = max(d__3, d__4); + d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], abs(d__2)); + bb = min(d__3, d__4); + s = aa + ab; + d__1 = smlnum, d__2 = ulp * (bb * (aa / s)); + if (ba * (ab / s) <= max(d__1, d__2)) { + goto L40; + } + } + } + L40: + l = k; + if (l > *ilo) { + h__[l + (l - 1) * h_dim1] = 0.; + } + if (l >= i__ - 1) { + goto L150; + } + if (!(*wantt)) { + i1 = l; + i2 = i__; + } + if (its == 10) { + s = (d__1 = h__[l + 1 + l * h_dim1], abs(d__1)) + + (d__2 = h__[l + 2 + (l + 1) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[l + l * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else if (its == 20) { + s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + h11 = s * .75 + h__[i__ + i__ * h_dim1]; + h12 = s * -.4375; + h21 = s; + h22 = h11; + } else { + h11 = h__[i__ - 1 + (i__ - 1) * h_dim1]; + h21 = h__[i__ + (i__ - 1) * h_dim1]; + h12 = h__[i__ - 1 + i__ * h_dim1]; + h22 = h__[i__ + i__ * h_dim1]; + } + s = abs(h11) + abs(h12) + abs(h21) + abs(h22); + if (s == 0.) { + rt1r = 0.; + rt1i = 0.; + rt2r = 0.; + rt2i = 0.; + } else { + h11 /= s; + h21 /= s; + h12 /= s; + h22 /= s; + tr = (h11 + h22) / 2.; + det = (h11 - tr) * (h22 - tr) - h12 * h21; + rtdisc = sqrt((abs(det))); + if (det >= 0.) { + rt1r = tr * s; + rt2r = rt1r; + rt1i = rtdisc * s; + rt2i = -rt1i; + } else { + rt1r = tr + rtdisc; + rt2r = tr - rtdisc; + if ((d__1 = rt1r - h22, abs(d__1)) <= (d__2 = rt2r - h22, abs(d__2))) { + rt1r *= s; + rt2r = rt1r; + } else { + rt2r *= s; + rt1r = rt2r; + } + rt1i = 0.; + rt2i = 0.; + } + } + i__2 = l; + for (m = i__ - 2; m >= i__2; --m) { + h21s = h__[m + 1 + m * h_dim1]; + s = (d__1 = h__[m + m * h_dim1] - rt2r, abs(d__1)) + abs(rt2i) + abs(h21s); + h21s = h__[m + 1 + m * h_dim1] / s; + v[0] = h21s * h__[m + (m + 1) * h_dim1] + + (h__[m + m * h_dim1] - rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - + rt1i * (rt2i / s); + v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1] - rt1r - rt2r); + v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1]; + s = abs(v[0]) + abs(v[1]) + abs(v[2]); + v[0] /= s; + v[1] /= s; + v[2] /= s; + if (m == l) { + goto L60; + } + if ((d__1 = h__[m + (m - 1) * h_dim1], abs(d__1)) * (abs(v[1]) + abs(v[2])) <= + ulp * abs(v[0]) * + ((d__2 = h__[m - 1 + (m - 1) * h_dim1], abs(d__2)) + + (d__3 = h__[m + m * h_dim1], abs(d__3)) + + (d__4 = h__[m + 1 + (m + 1) * h_dim1], abs(d__4)))) { + goto L60; + } + } + L60: + i__2 = i__ - 1; + for (k = m; k <= i__2; ++k) { + i__3 = 3, i__4 = i__ - k + 1; + nr = min(i__3, i__4); + if (k > m) { + dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); + } + dlarfg_(&nr, v, &v[1], &c__1, &t1); + if (k > m) { + h__[k + (k - 1) * h_dim1] = v[0]; + h__[k + 1 + (k - 1) * h_dim1] = 0.; + if (k < i__ - 1) { + h__[k + 2 + (k - 1) * h_dim1] = 0.; + } + } else if (m > l) { + h__[k + (k - 1) * h_dim1] *= 1. - t1; + } + v2 = v[1]; + t2 = t1 * v2; + if (nr == 3) { + v3 = v[2]; + t3 = t1 * v3; + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + + v3 * h__[k + 2 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + h__[k + 2 + j * h_dim1] -= sum * t3; + } + i__4 = k + 3; + i__3 = min(i__4, i__); + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + + v3 * h__[j + (k + 2) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + h__[j + (k + 2) * h_dim1] -= sum * t3; + } + if (*wantz) { + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1] + + v3 * z__[j + (k + 2) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + z__[j + (k + 2) * z_dim1] -= sum * t3; + } + } + } else if (nr == 2) { + i__3 = i2; + for (j = k; j <= i__3; ++j) { + sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; + h__[k + j * h_dim1] -= sum * t1; + h__[k + 1 + j * h_dim1] -= sum * t2; + } + i__3 = i__; + for (j = i1; j <= i__3; ++j) { + sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]; + h__[j + k * h_dim1] -= sum * t1; + h__[j + (k + 1) * h_dim1] -= sum * t2; + } + if (*wantz) { + i__3 = *ihiz; + for (j = *iloz; j <= i__3; ++j) { + sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1]; + z__[j + k * z_dim1] -= sum * t1; + z__[j + (k + 1) * z_dim1] -= sum * t2; + } + } + } + } + } + *info = i__; + return 0; +L150: + if (l == i__) { + wr[i__] = h__[i__ + i__ * h_dim1]; + wi[i__] = 0.; + } else if (l == i__ - 1) { + dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * h_dim1], + &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * h_dim1], &wr[i__ - 1], + &wi[i__ - 1], &wr[i__], &wi[i__], &cs, &sn); + if (*wantt) { + if (i2 > i__) { + i__1 = i2 - i__; + drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, + &h__[i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); + } + i__1 = i__ - i1 - 1; + drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * h_dim1], &c__1, &cs, + &sn); + } + if (*wantz) { + drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + i__ * z_dim1], &c__1, + &cs, &sn); + } + } + i__ = l - 1; + goto L20; +L160: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlahr2.cpp b/lib/linalg/dlahr2.cpp new file mode 100644 index 0000000000..36264e950f --- /dev/null +++ b/lib/linalg/dlahr2.cpp @@ -0,0 +1,121 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b4 = -1.; +static doublereal c_b5 = 1.; +static integer c__1 = 1; +static doublereal c_b38 = 0.; +int dlahr2_(integer *n, integer *k, integer *nb, doublereal *a, integer *lda, doublereal *tau, + doublereal *t, integer *ldt, doublereal *y, integer *ldy) +{ + integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, i__3; + doublereal d__1; + integer i__; + doublereal ei; + extern int dscal_(integer *, doublereal *, doublereal *, integer *), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen), + dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), + daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), + dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen, ftnlen, ftnlen), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + --tau; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + y_dim1 = *ldy; + y_offset = 1 + y_dim1; + y -= y_offset; + if (*n <= 1) { + return 0; + } + i__1 = *nb; + for (i__ = 1; i__ <= i__1; ++i__) { + if (i__ > 1) { + i__2 = *n - *k; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, + &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[*k + 1 + i__ * a_dim1], &c__1, + (ftnlen)12); + i__2 = i__ - 1; + dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + dtrmv_((char *)"Lower", (char *)"Transpose", (char *)"UNIT", &i__2, &a[*k + 1 + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, (ftnlen)5, (ftnlen)9, (ftnlen)4); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb * t_dim1 + 1], &c__1, + (ftnlen)9); + i__2 = i__ - 1; + dtrmv_((char *)"Upper", (char *)"Transpose", (char *)"NON-UNIT", &i__2, &t[t_offset], ldt, &t[*nb * t_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)9, (ftnlen)8); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ + i__ * a_dim1], &c__1, + (ftnlen)12); + i__2 = i__ - 1; + dtrmv_((char *)"Lower", (char *)"NO TRANSPOSE", (char *)"UNIT", &i__2, &a[*k + 1 + a_dim1], lda, + &t[*nb * t_dim1 + 1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)4); + i__2 = i__ - 1; + daxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ * a_dim1], &c__1); + a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei; + } + i__2 = *n - *k - i__ + 1; + i__3 = *k + i__ + 1; + dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n) + i__ * a_dim1], &c__1, + &tau[i__]); + ei = a[*k + i__ + i__ * a_dim1]; + a[*k + i__ + i__ * a_dim1] = 1.; + i__2 = *n - *k; + i__3 = *n - *k - i__ + 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b5, &a[*k + 1 + (i__ + 1) * a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[*k + 1 + i__ * y_dim1], &c__1, + (ftnlen)12); + i__2 = *n - *k - i__ + 1; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, + &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 + 1], &c__1, (ftnlen)9); + i__2 = *n - *k; + i__3 = i__ - 1; + dgemv_((char *)"NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, &t[i__ * t_dim1 + 1], + &c__1, &c_b5, &y[*k + 1 + i__ * y_dim1], &c__1, (ftnlen)12); + i__2 = *n - *k; + dscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1); + i__2 = i__ - 1; + d__1 = -tau[i__]; + dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1); + i__2 = i__ - 1; + dtrmv_((char *)"Upper", (char *)"No Transpose", (char *)"NON-UNIT", &i__2, &t[t_offset], ldt, &t[i__ * t_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)8); + t[i__ + i__ * t_dim1] = tau[i__]; + } + a[*k + *nb + *nb * a_dim1] = ei; + dlacpy_((char *)"ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy, (ftnlen)3); + dtrmm_((char *)"RIGHT", (char *)"Lower", (char *)"NO TRANSPOSE", (char *)"UNIT", k, nb, &c_b5, &a[*k + 1 + a_dim1], lda, + &y[y_offset], ldy, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (*n > *k + *nb) { + i__1 = *n - *k - *nb; + dgemm_((char *)"NO TRANSPOSE", (char *)"NO TRANSPOSE", k, nb, &i__1, &c_b5, &a[(*nb + 2) * a_dim1 + 1], lda, + &a[*k + 1 + *nb + a_dim1], lda, &c_b5, &y[y_offset], ldy, (ftnlen)12, (ftnlen)12); + } + dtrmm_((char *)"RIGHT", (char *)"Upper", (char *)"NO TRANSPOSE", (char *)"NON-UNIT", k, nb, &c_b5, &t[t_offset], ldt, + &y[y_offset], ldy, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)8); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaln2.cpp b/lib/linalg/dlaln2.cpp new file mode 100644 index 0000000000..220eaae63a --- /dev/null +++ b/lib/linalg/dlaln2.cpp @@ -0,0 +1,298 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dlaln2_(logical *ltrans, integer *na, integer *nw, doublereal *smin, doublereal *ca, + doublereal *a, integer *lda, doublereal *d1, doublereal *d2, doublereal *b, + integer *ldb, doublereal *wr, doublereal *wi, doublereal *x, integer *ldx, + doublereal *scale, doublereal *xnorm, integer *info) +{ + static logical zswap[4] = {FALSE_, FALSE_, TRUE_, TRUE_}; + static logical rswap[4] = {FALSE_, TRUE_, FALSE_, TRUE_}; + static integer ipivot[16] = {1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4, 3, 2, 1}; + integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + static doublereal equiv_0[4], equiv_1[4]; + integer j; +#define ci (equiv_0) +#define cr (equiv_1) + doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21, cr22, li21, csi, ui11, + lr21, ui12, ui22; +#define civ (equiv_0) + doublereal csr, ur11, ur12, ur22; +#define crv (equiv_1) + doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs; + integer icmax; + doublereal bnorm, cnorm, smini; + extern doublereal dlamch_(char *, ftnlen); + extern int dladiv_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *); + doublereal bignum, smlnum; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + smlnum = 2. * dlamch_((char *)"Safe minimum", (ftnlen)12); + bignum = 1. / smlnum; + smini = max(*smin, smlnum); + *info = 0; + *scale = 1.; + if (*na == 1) { + if (*nw == 1) { + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + cnorm = abs(csr); + if (cnorm < smini) { + csr = smini; + cnorm = smini; + *info = 1; + } + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + } else { + csr = *ca * a[a_dim1 + 1] - *wr * *d1; + csi = -(*wi) * *d1; + cnorm = abs(csr) + abs(csi); + if (cnorm < smini) { + csr = smini; + csi = 0.; + cnorm = smini; + *info = 1; + } + bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)); + if (cnorm < 1. && bnorm > 1.) { + if (bnorm > bignum * cnorm) { + *scale = 1. / bnorm; + } + } + d__1 = *scale * b[b_dim1 + 1]; + d__2 = *scale * b[(b_dim1 << 1) + 1]; + dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1) + 1]); + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + 1], abs(d__2)); + } + } else { + cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1; + cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2; + if (*ltrans) { + cr[2] = *ca * a[a_dim1 + 2]; + cr[1] = *ca * a[(a_dim1 << 1) + 1]; + } else { + cr[1] = *ca * a[a_dim1 + 2]; + cr[2] = *ca * a[(a_dim1 << 1) + 1]; + } + if (*nw == 1) { + cmax = 0.; + icmax = 0; + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)); + icmax = j; + } + } + if (cmax < smini) { + d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[b_dim1 + 2], abs(d__2)); + bnorm = max(d__3, d__4); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + ur11 = crv[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ur11r = 1. / ur11; + lr21 = ur11r * cr21; + ur22 = cr22 - ur12 * lr21; + if (abs(ur22) < smini) { + ur22 = smini; + *info = 1; + } + if (rswap[icmax - 1]) { + br1 = b[b_dim1 + 2]; + br2 = b[b_dim1 + 1]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + } + br2 -= lr21 * br1; + d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2); + bbnd = max(d__2, d__3); + if (bbnd > 1. && abs(ur22) < 1.) { + if (bbnd >= bignum * abs(ur22)) { + *scale = 1. / bbnd; + } + } + xr2 = br2 * *scale / ur22; + xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12); + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + } + d__1 = abs(xr1), d__2 = abs(xr2); + *xnorm = max(d__1, d__2); + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } else { + ci[0] = -(*wi) * *d1; + ci[1] = 0.; + ci[2] = 0.; + ci[3] = -(*wi) * *d2; + cmax = 0.; + icmax = 0; + for (j = 1; j <= 4; ++j) { + if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(d__2)) > cmax) { + cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(d__2)); + icmax = j; + } + } + if (cmax < smini) { + d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 << 1) + 1], abs(d__2)), + d__6 = (d__3 = b[b_dim1 + 2], abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4)); + bnorm = max(d__5, d__6); + if (smini < 1. && bnorm > 1.) { + if (bnorm > bignum * smini) { + *scale = 1. / bnorm; + } + } + temp = *scale / smini; + x[x_dim1 + 1] = temp * b[b_dim1 + 1]; + x[x_dim1 + 2] = temp * b[b_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2]; + *xnorm = temp * bnorm; + *info = 1; + return 0; + } + ur11 = crv[icmax - 1]; + ui11 = civ[icmax - 1]; + cr21 = crv[ipivot[(icmax << 2) - 3] - 1]; + ci21 = civ[ipivot[(icmax << 2) - 3] - 1]; + ur12 = crv[ipivot[(icmax << 2) - 2] - 1]; + ui12 = civ[ipivot[(icmax << 2) - 2] - 1]; + cr22 = crv[ipivot[(icmax << 2) - 1] - 1]; + ci22 = civ[ipivot[(icmax << 2) - 1] - 1]; + if (icmax == 1 || icmax == 4) { + if (abs(ur11) > abs(ui11)) { + temp = ui11 / ur11; + d__1 = temp; + ur11r = 1. / (ur11 * (d__1 * d__1 + 1.)); + ui11r = -temp * ur11r; + } else { + temp = ur11 / ui11; + d__1 = temp; + ui11r = -1. / (ui11 * (d__1 * d__1 + 1.)); + ur11r = -temp * ui11r; + } + lr21 = cr21 * ur11r; + li21 = cr21 * ui11r; + ur12s = ur12 * ur11r; + ui12s = ur12 * ui11r; + ur22 = cr22 - ur12 * lr21; + ui22 = ci22 - ur12 * li21; + } else { + ur11r = 1. / ur11; + ui11r = 0.; + lr21 = cr21 * ur11r; + li21 = ci21 * ur11r; + ur12s = ur12 * ur11r; + ui12s = ui12 * ur11r; + ur22 = cr22 - ur12 * lr21 + ui12 * li21; + ui22 = -ur12 * li21 - ui12 * lr21; + } + u22abs = abs(ur22) + abs(ui22); + if (u22abs < smini) { + ur22 = smini; + ui22 = 0.; + *info = 1; + } + if (rswap[icmax - 1]) { + br2 = b[b_dim1 + 1]; + br1 = b[b_dim1 + 2]; + bi2 = b[(b_dim1 << 1) + 1]; + bi1 = b[(b_dim1 << 1) + 2]; + } else { + br1 = b[b_dim1 + 1]; + br2 = b[b_dim1 + 2]; + bi1 = b[(b_dim1 << 1) + 1]; + bi2 = b[(b_dim1 << 1) + 2]; + } + br2 = br2 - lr21 * br1 + li21 * bi1; + bi2 = bi2 - li21 * br1 - lr21 * bi1; + d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r))), + d__2 = abs(br2) + abs(bi2); + bbnd = max(d__1, d__2); + if (bbnd > 1. && u22abs < 1.) { + if (bbnd >= bignum * u22abs) { + *scale = 1. / bbnd; + br1 = *scale * br1; + bi1 = *scale * bi1; + br2 = *scale * br2; + bi2 = *scale * bi2; + } + } + dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2); + xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2; + xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2; + if (zswap[icmax - 1]) { + x[x_dim1 + 1] = xr2; + x[x_dim1 + 2] = xr1; + x[(x_dim1 << 1) + 1] = xi2; + x[(x_dim1 << 1) + 2] = xi1; + } else { + x[x_dim1 + 1] = xr1; + x[x_dim1 + 2] = xr2; + x[(x_dim1 << 1) + 1] = xi1; + x[(x_dim1 << 1) + 2] = xi2; + } + d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2); + *xnorm = max(d__1, d__2); + if (*xnorm > 1. && cmax > 1.) { + if (*xnorm > bignum / cmax) { + temp = cmax / bignum; + x[x_dim1 + 1] = temp * x[x_dim1 + 1]; + x[x_dim1 + 2] = temp * x[x_dim1 + 2]; + x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1]; + x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2]; + *xnorm = temp * *xnorm; + *scale = temp * *scale; + } + } + } + } + return 0; +} +#undef crv +#undef civ +#undef cr +#undef ci +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlanv2.cpp b/lib/linalg/dlanv2.cpp new file mode 100644 index 0000000000..29a511bf31 --- /dev/null +++ b/lib/linalg/dlanv2.cpp @@ -0,0 +1,106 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b3 = 1.; +int dlanv2_(doublereal *a, doublereal *b, doublereal *c__, doublereal *d__, doublereal *rt1r, + doublereal *rt1i, doublereal *rt2r, doublereal *rt2i, doublereal *cs, doublereal *sn) +{ + doublereal d__1, d__2; + double d_lmp_sign(doublereal *, doublereal *), sqrt(doublereal); + doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp, scale, bcmax, bcmis, + sigma; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen); + eps = dlamch_((char *)"P", (ftnlen)1); + if (*c__ == 0.) { + *cs = 1.; + *sn = 0.; + } else if (*b == 0.) { + *cs = 0.; + *sn = 1.; + temp = *d__; + *d__ = *a; + *a = temp; + *b = -(*c__); + *c__ = 0.; + } else if (*a - *d__ == 0. && d_lmp_sign(&c_b3, b) != d_lmp_sign(&c_b3, c__)) { + *cs = 1.; + *sn = 0.; + } else { + temp = *a - *d__; + p = temp * .5; + d__1 = abs(*b), d__2 = abs(*c__); + bcmax = max(d__1, d__2); + d__1 = abs(*b), d__2 = abs(*c__); + bcmis = min(d__1, d__2) * d_lmp_sign(&c_b3, b) * d_lmp_sign(&c_b3, c__); + d__1 = abs(p); + scale = max(d__1, bcmax); + z__ = p / scale * p + bcmax / scale * bcmis; + if (z__ >= eps * 4.) { + d__1 = sqrt(scale) * sqrt(z__); + z__ = p + d_lmp_sign(&d__1, &p); + *a = *d__ + z__; + *d__ -= bcmax / z__ * bcmis; + tau = dlapy2_(c__, &z__); + *cs = z__ / tau; + *sn = *c__ / tau; + *b -= *c__; + *c__ = 0.; + } else { + sigma = *b + *c__; + tau = dlapy2_(&sigma, &temp); + *cs = sqrt((abs(sigma) / tau + 1.) * .5); + *sn = -(p / (tau * *cs)) * d_lmp_sign(&c_b3, &sigma); + aa = *a * *cs + *b * *sn; + bb = -(*a) * *sn + *b * *cs; + cc = *c__ * *cs + *d__ * *sn; + dd = -(*c__) * *sn + *d__ * *cs; + *a = aa * *cs + cc * *sn; + *b = bb * *cs + dd * *sn; + *c__ = -aa * *sn + cc * *cs; + *d__ = -bb * *sn + dd * *cs; + temp = (*a + *d__) * .5; + *a = temp; + *d__ = temp; + if (*c__ != 0.) { + if (*b != 0.) { + if (d_lmp_sign(&c_b3, b) == d_lmp_sign(&c_b3, c__)) { + sab = sqrt((abs(*b))); + sac = sqrt((abs(*c__))); + d__1 = sab * sac; + p = d_lmp_sign(&d__1, c__); + tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1))); + *a = temp + p; + *d__ = temp - p; + *b -= *c__; + *c__ = 0.; + cs1 = sab * tau; + sn1 = sac * tau; + temp = *cs * cs1 - *sn * sn1; + *sn = *cs * sn1 + *sn * cs1; + *cs = temp; + } + } else { + *b = -(*c__); + *c__ = 0.; + temp = *cs; + *cs = -(*sn); + *sn = temp; + } + } + } + } + *rt1r = *a; + *rt2r = *d__; + if (*c__ == 0.) { + *rt1i = 0.; + *rt2i = 0.; + } else { + *rt1i = sqrt((abs(*b))) * sqrt((abs(*c__))); + *rt2i = -(*rt1i); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr0.cpp b/lib/linalg/dlaqr0.cpp new file mode 100644 index 0000000000..31a265c3e9 --- /dev/null +++ b/lib/linalg/dlaqr0.cpp @@ -0,0 +1,306 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__13 = 13; +static integer c__15 = 15; +static integer c_n1 = -1; +static integer c__12 = 12; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static integer c__1 = 1; +static integer c__3 = 3; +int dlaqr0_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + integer i__, k; + doublereal aa, bb, cc, dd; + integer ld; + doublereal cs; + integer nh, it, ks, kt; + doublereal sn; + integer ku, kv, ls, ns; + doublereal ss; + integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, nmin; + doublereal swap; + integer ktop; + doublereal zdum[1]; + integer kacc22, itmax, nsmax, nwmax, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr3_(logical *, logical *, integer *, integer *, integer *, integer *, doublereal *, + integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *, doublereal *, integer *, doublereal *, integer *), + dlaqr4_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), + dlaqr5_(logical *, logical *, integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *); + integer nibble; + extern int dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + char jbcmpz[2]; + integer nwupbd; + logical sorted; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + *info = 0; + if (*n == 0) { + work[1] = 1.; + return 0; + } + if (*n <= 11) { + lwkopt = 1; + if (*lwork != -1) { + dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], iloz, ihiz, + &z__[z_offset], ldz, info); + } + } else { + *info = 0; + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + nwr = ilaenv_(&c__13, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nwr = max(2, nwr); + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1, i__2); + nwr = min(i__1, nwr); + nsr = ilaenv_(&c__15, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1, i__2), i__2 = *ihi - *ilo; + nsr = min(i__1, i__2); + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1, i__2); + i__1 = nwr + 1; + dlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], + ldz, &ls, &ld, &wr[1], &wi[1], &h__[h_offset], ldh, n, &h__[h_offset], ldh, n, + &h__[h_offset], ldh, &work[1], &c_n1); + i__1 = nsr * 3 / 2, i__2 = (integer)work[1]; + lwkopt = max(i__1, i__2); + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + nmin = ilaenv_(&c__12, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + nibble = ilaenv_(&c__14, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nibble = max(0, nibble); + kacc22 = ilaenv_(&c__16, (char *)"DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + kacc22 = max(0, kacc22); + kacc22 = min(2, kacc22); + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1, i__2); + nw = nwmax; + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1, i__2); + nsmax -= nsmax % 2; + ndfl = 1; + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1, i__2) * 30; + kbot = *ihi; + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + if (kbot < *ilo) { + goto L90; + } + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } + } + k = *ilo; + L20: + ktop = k; + nh = kbot - ktop + 1; + nwupbd = min(nh, nwmax); + if (ndfl < 5) { + nw = min(nwupbd, nwr); + } else { + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2, i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) > + (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + dlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, iloz, ihiz, + &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[kv + h_dim1], ldh, &nho, + &h__[kv + kt * h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + kbot -= ld; + ks = kbot - ls + 1; + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(nmin, nwmax)) { + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax, nsr), i__3 = max(i__4, i__5); + ns = min(i__2, i__3); + ns -= ns % 2; + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3, i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], + &cs, &sn); + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + dlacpy_((char *)"A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &h__[kt + h_dim1], ldh, + (ftnlen)1); + if (ns > nmin) { + dlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &work[1], lwork, + &inf); + } else { + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &inf); + } + ks += inf; + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[kbot - 1], &wr[kbot], + &wi[kbot], &cs, &sn); + ks = kbot - 1; + } + } + if (kbot - ks + 1 > ns) { + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[i__], abs(d__2)) < + (d__3 = wr[i__ + 1], abs(d__3)) + + (d__4 = wi[i__ + 1], abs(d__4))) { + sorted = FALSE_; + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } + } + } + L60:; + } + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } + } + } + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs(d__1)) < + (d__2 = wr[kbot - 1] - h__[kbot + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2, i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], &wi[ks], + &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &work[1], &c__3, + &h__[ku + h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, + &h__[ku + kwh * h_dim1], ldh); + } + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + } + *info = kbot; + L90:; + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr1.cpp b/lib/linalg/dlaqr1.cpp new file mode 100644 index 0000000000..292dce0f45 --- /dev/null +++ b/lib/linalg/dlaqr1.cpp @@ -0,0 +1,52 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dlaqr1_(integer *n, doublereal *h__, integer *ldh, doublereal *sr1, doublereal *si1, + doublereal *sr2, doublereal *si2, doublereal *v) +{ + integer h_dim1, h_offset; + doublereal d__1, d__2, d__3; + doublereal s, h21s, h31s; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --v; + if (*n != 2 && *n != 3) { + return 0; + } + if (*n == 2) { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + + (d__2 = h__[h_dim1 + 2], abs(d__2)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + v[1] = h21s * h__[(h_dim1 << 1) + 1] + + (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s); + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *sr2); + } + } else { + s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + + (d__2 = h__[h_dim1 + 2], abs(d__2)) + (d__3 = h__[h_dim1 + 3], abs(d__3)); + if (s == 0.) { + v[1] = 0.; + v[2] = 0.; + v[3] = 0.; + } else { + h21s = h__[h_dim1 + 2] / s; + h31s = h__[h_dim1 + 3] / s; + v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s) + + h__[(h_dim1 << 1) + 1] * h21s + h__[h_dim1 * 3 + 1] * h31s; + v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *sr2) + + h__[h_dim1 * 3 + 2] * h31s; + v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - *sr2) + + h21s * h__[(h_dim1 << 1) + 3]; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr2.cpp b/lib/linalg/dlaqr2.cpp new file mode 100644 index 0000000000..102433a90d --- /dev/null +++ b/lib/linalg/dlaqr2.cpp @@ -0,0 +1,359 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b12 = 0.; +static doublereal c_b13 = 1.; +static logical c_true = TRUE_; +int dlaqr2_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, + doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal *v, + integer *ldv, integer *nh, doublereal *t, integer *ldt, integer *nv, doublereal *wv, + integer *ldwv, doublereal *work, integer *lwork) +{ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, wv_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + double sqrt(doublereal); + integer i__, j, k; + doublereal s, aa, bb, cc, dd, cs, sn; + integer jw; + doublereal evi, evk, foo; + integer kln; + doublereal tau, ulp; + integer lwk1, lwk2; + doublereal beta; + integer kend, kcol, info, ifst, ilst, ltop, krow; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + logical bulge; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer infqr, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen); + doublereal safmax; + extern int dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *, ftnlen), + dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen, + ftnlen); + logical sorted; + doublereal smlnum; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + i__1 = jw - 1; + dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &c_n1, &info); + lwk1 = (integer)work[1]; + i__1 = jw - 1; + dormhr_((char *)"R", (char *)"N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[1], &c_n1, &info, (ftnlen)1, (ftnlen)1); + lwk2 = (integer)work[1]; + lwkopt = jw + max(lwk1, lwk2); + } + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } + if (*nw < 1) { + return 0; + } + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + if (*kbot == kwtop) { + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs(d__1)); + if (abs(s) <= max(d__2, d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + dlacpy_((char *)"U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], ldt, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &i__3); + dlaset_((char *)"A", &jw, &jw, &c_b12, &c_b13, &v[v_offset], ldv, (ftnlen)1); + dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &infqr); + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + if (!bulge) { + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2, d__3)) { + --(*ns); + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ++ilst; + } + } else { + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + + sqrt((d__1 = t[*ns + (*ns - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[*ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), + d__4 = (d__2 = s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3, d__4) <= max(d__5, d__6)) { + *ns += -2; + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ilst += 2; + } + } + goto L20; + } + if (*ns == 0) { + s = 0.; + } + if (*ns < jw) { + sorted = FALSE_; + i__ = *ns + 1; + L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + + sqrt((d__1 = t[i__ + 1 + i__ * t_dim1], abs(d__1))) * + sqrt((d__2 = t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + + sqrt((d__1 = t[k + 1 + k * t_dim1], abs(d__1))) * + sqrt((d__2 = t[k + (k + 1) * t_dim1], abs(d__2))); + } + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; + L50:; + } + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ - 2], + &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &sn); + i__ += -2; + } + goto L60; + } + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + i__1 = jw - 2; + i__2 = jw - 2; + dlaset_((char *)"L", &i__1, &i__2, &c_b12, &c_b12, &t[t_dim1 + 3], ldt, (ftnlen)1); + dlarf_((char *)"L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], + (ftnlen)1); + dlarf_((char *)"R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], (ftnlen)1); + dlarf_((char *)"R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &work[jw + 1], + (ftnlen)1); + i__1 = *lwork - jw; + dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1], &i__1, &info); + } + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + dlacpy_((char *)"U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1], ldh, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], &i__3); + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + dormhr_((char *)"R", (char *)"N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[jw + 1], &i__1, &info, (ftnlen)1, (ftnlen)1); + } + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b13, &h__[krow + kwtop * h_dim1], ldh, &v[v_offset], + ldv, &c_b12, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * h_dim1], ldh, + (ftnlen)1); + } + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; kcol += i__1) { + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3, i__4); + dgemm_((char *)"C", (char *)"N", &jw, &kln, &jw, &c_b13, &v[v_offset], ldv, + &h__[kwtop + kcol * h_dim1], ldh, &c_b12, &t[t_offset], ldt, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * h_dim1], ldh, + (ftnlen)1); + } + } + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b13, &z__[krow + kwtop * z_dim1], ldz, + &v[v_offset], ldv, &c_b12, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + kwtop * z_dim1], ldz, + (ftnlen)1); + } + } + } + *nd = jw - *ns; + *ns -= infqr; + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr3.cpp b/lib/linalg/dlaqr3.cpp new file mode 100644 index 0000000000..5711a3e349 --- /dev/null +++ b/lib/linalg/dlaqr3.cpp @@ -0,0 +1,375 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static logical c_true = TRUE_; +static doublereal c_b17 = 0.; +static doublereal c_b18 = 1.; +static integer c__12 = 12; +int dlaqr3_(logical *wantt, logical *wantz, integer *n, integer *ktop, integer *kbot, integer *nw, + doublereal *h__, integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, + integer *ldz, integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal *v, + integer *ldv, integer *nh, doublereal *t, integer *ldt, integer *nv, doublereal *wv, + integer *ldwv, doublereal *work, integer *lwork) +{ + integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, wv_offset, z_dim1, + z_offset, i__1, i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4, d__5, d__6; + double sqrt(doublereal); + integer i__, j, k; + doublereal s, aa, bb, cc, dd, cs, sn; + integer jw; + doublereal evi, evk, foo; + integer kln; + doublereal tau, ulp; + integer lwk1, lwk2, lwk3; + doublereal beta; + integer kend, kcol, info, nmin, ifst, ilst, ltop, krow; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + logical bulge; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer infqr, kwtop; + extern int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr4_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *, integer *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *), + dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + doublereal safmax; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen), + dtrexc_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, + integer *, doublereal *, integer *, ftnlen), + dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, ftnlen, + ftnlen); + logical sorted; + doublereal smlnum; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --sr; + --si; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + --work; + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + if (jw <= 2) { + lwkopt = 1; + } else { + i__1 = jw - 1; + dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &c_n1, &info); + lwk1 = (integer)work[1]; + i__1 = jw - 1; + dormhr_((char *)"R", (char *)"N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[1], &c_n1, &info, (ftnlen)1, (ftnlen)1); + lwk2 = (integer)work[1]; + dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1], &si[1], &c__1, &jw, + &v[v_offset], ldv, &work[1], &c_n1, &infqr); + lwk3 = (integer)work[1]; + i__1 = jw + max(lwk1, lwk2); + lwkopt = max(i__1, lwk3); + } + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + *ns = 0; + *nd = 0; + work[1] = 1.; + if (*ktop > *kbot) { + return 0; + } + if (*nw < 1) { + return 0; + } + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + i__1 = *nw, i__2 = *kbot - *ktop + 1; + jw = min(i__1, i__2); + kwtop = *kbot - jw + 1; + if (kwtop == *ktop) { + s = 0.; + } else { + s = h__[kwtop + (kwtop - 1) * h_dim1]; + } + if (*kbot == kwtop) { + sr[kwtop] = h__[kwtop + kwtop * h_dim1]; + si[kwtop] = 0.; + *ns = 1; + *nd = 0; + d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs(d__1)); + if (abs(s) <= max(d__2, d__3)) { + *ns = 0; + *nd = 1; + if (kwtop > *ktop) { + h__[kwtop + (kwtop - 1) * h_dim1] = 0.; + } + } + work[1] = 1.; + return 0; + } + dlacpy_((char *)"U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], ldt, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldh + 1; + i__3 = *ldt + 1; + dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &i__3); + dlaset_((char *)"A", &jw, &jw, &c_b17, &c_b18, &v[v_offset], ldv, (ftnlen)1); + nmin = ilaenv_(&c__12, (char *)"DLAQR3", (char *)"SV", &jw, &c__1, &jw, lwork, (ftnlen)6, (ftnlen)2); + if (jw > nmin) { + dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &work[1], lwork, &infqr); + } else { + dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop], &si[kwtop], &c__1, + &jw, &v[v_offset], ldv, &infqr); + } + i__1 = jw - 3; + for (j = 1; j <= i__1; ++j) { + t[j + 2 + j * t_dim1] = 0.; + t[j + 3 + j * t_dim1] = 0.; + } + if (jw > 2) { + t[jw + (jw - 2) * t_dim1] = 0.; + } + *ns = jw; + ilst = infqr + 1; +L20: + if (ilst <= *ns) { + if (*ns == 1) { + bulge = FALSE_; + } else { + bulge = t[*ns + (*ns - 1) * t_dim1] != 0.; + } + if (!bulge) { + foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1)); + if (foo == 0.) { + foo = abs(s); + } + d__2 = smlnum, d__3 = ulp * foo; + if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2, d__3)) { + --(*ns); + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ++ilst; + } + } else { + foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + + sqrt((d__1 = t[*ns + (*ns - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[*ns - 1 + *ns * t_dim1], abs(d__2))); + if (foo == 0.) { + foo = abs(s); + } + d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), + d__4 = (d__2 = s * v[(*ns - 1) * v_dim1 + 1], abs(d__2)); + d__5 = smlnum, d__6 = ulp * foo; + if (max(d__3, d__4) <= max(d__5, d__6)) { + *ns += -2; + } else { + ifst = *ns; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + ilst += 2; + } + } + goto L20; + } + if (*ns == 0) { + s = 0.; + } + if (*ns < jw) { + sorted = FALSE_; + i__ = *ns + 1; + L30: + if (sorted) { + goto L50; + } + sorted = TRUE_; + kend = i__ - 1; + i__ = infqr + 1; + if (i__ == *ns) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + L40: + if (k <= kend) { + if (k == i__ + 1) { + evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1)); + } else { + evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + + sqrt((d__1 = t[i__ + 1 + i__ * t_dim1], abs(d__1))) * + sqrt((d__2 = t[i__ + (i__ + 1) * t_dim1], abs(d__2))); + } + if (k == kend) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else if (t[k + 1 + k * t_dim1] == 0.) { + evk = (d__1 = t[k + k * t_dim1], abs(d__1)); + } else { + evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + + sqrt((d__1 = t[k + 1 + k * t_dim1], abs(d__1))) * + sqrt((d__2 = t[k + (k + 1) * t_dim1], abs(d__2))); + } + if (evi >= evk) { + i__ = k; + } else { + sorted = FALSE_; + ifst = i__; + ilst = k; + dtrexc_((char *)"V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &ilst, &work[1], + &info, (ftnlen)1); + if (info == 0) { + i__ = ilst; + } else { + i__ = k; + } + } + if (i__ == kend) { + k = i__ + 1; + } else if (t[i__ + 1 + i__ * t_dim1] == 0.) { + k = i__ + 1; + } else { + k = i__ + 2; + } + goto L40; + } + goto L30; + L50:; + } + i__ = jw; +L60: + if (i__ >= infqr + 1) { + if (i__ == infqr + 1) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else if (t[i__ + (i__ - 1) * t_dim1] == 0.) { + sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1]; + si[kwtop + i__ - 1] = 0.; + --i__; + } else { + aa = t[i__ - 1 + (i__ - 1) * t_dim1]; + cc = t[i__ + (i__ - 1) * t_dim1]; + bb = t[i__ - 1 + i__ * t_dim1]; + dd = t[i__ + i__ * t_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__ - 2], + &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &sn); + i__ += -2; + } + goto L60; + } + if (*ns < jw || s == 0.) { + if (*ns > 1 && s != 0.) { + dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1); + beta = work[1]; + dlarfg_(ns, &beta, &work[2], &c__1, &tau); + work[1] = 1.; + i__1 = jw - 2; + i__2 = jw - 2; + dlaset_((char *)"L", &i__1, &i__2, &c_b17, &c_b17, &t[t_dim1 + 3], ldt, (ftnlen)1); + dlarf_((char *)"L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], + (ftnlen)1); + dlarf_((char *)"R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &work[jw + 1], (ftnlen)1); + dlarf_((char *)"R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &work[jw + 1], + (ftnlen)1); + i__1 = *lwork - jw; + dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1], &i__1, &info); + } + if (kwtop > 1) { + h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1]; + } + dlacpy_((char *)"U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1], ldh, (ftnlen)1); + i__1 = jw - 1; + i__2 = *ldt + 1; + i__3 = *ldh + 1; + dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], &i__3); + if (*ns > 1 && s != 0.) { + i__1 = *lwork - jw; + dormhr_((char *)"R", (char *)"N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], &v[v_offset], ldv, + &work[jw + 1], &i__1, &info, (ftnlen)1, (ftnlen)1); + } + if (*wantt) { + ltop = 1; + } else { + ltop = *ktop; + } + i__1 = kwtop - 1; + i__2 = *nv; + for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = kwtop - krow; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b18, &h__[krow + kwtop * h_dim1], ldh, &v[v_offset], + ldv, &c_b17, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * h_dim1], ldh, + (ftnlen)1); + } + if (*wantt) { + i__2 = *n; + i__1 = *nh; + for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; kcol += i__1) { + i__3 = *nh, i__4 = *n - kcol + 1; + kln = min(i__3, i__4); + dgemm_((char *)"C", (char *)"N", &jw, &kln, &jw, &c_b18, &v[v_offset], ldv, + &h__[kwtop + kcol * h_dim1], ldh, &c_b17, &t[t_offset], ldt, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol * h_dim1], ldh, + (ftnlen)1); + } + } + if (*wantz) { + i__1 = *ihiz; + i__2 = *nv; + for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += i__2) { + i__3 = *nv, i__4 = *ihiz - krow + 1; + kln = min(i__3, i__4); + dgemm_((char *)"N", (char *)"N", &kln, &jw, &jw, &c_b18, &z__[krow + kwtop * z_dim1], ldz, + &v[v_offset], ldv, &c_b17, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + kwtop * z_dim1], ldz, + (ftnlen)1); + } + } + } + *nd = jw - *ns; + *ns -= infqr; + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr4.cpp b/lib/linalg/dlaqr4.cpp new file mode 100644 index 0000000000..e32193ee2d --- /dev/null +++ b/lib/linalg/dlaqr4.cpp @@ -0,0 +1,298 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__13 = 13; +static integer c__15 = 15; +static integer c_n1 = -1; +static integer c__12 = 12; +static integer c__14 = 14; +static integer c__16 = 16; +static logical c_false = FALSE_; +static integer c__1 = 1; +static integer c__3 = 3; +int dlaqr4_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, + integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, + doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) +{ + integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + integer i__, k; + doublereal aa, bb, cc, dd; + integer ld; + doublereal cs; + integer nh, it, ks, kt; + doublereal sn; + integer ku, kv, ls, ns; + doublereal ss; + integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot, nmin; + doublereal swap; + integer ktop; + doublereal zdum[1]; + integer kacc22, itmax, nsmax, nwmax, kwtop; + extern int dlaqr2_(logical *, logical *, integer *, integer *, integer *, integer *, + doublereal *, integer *, integer *, integer *, doublereal *, integer *, + integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *, + doublereal *, integer *), + dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, doublereal *), + dlaqr5_(logical *, logical *, integer *, integer *, integer *, integer *, integer *, + doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, doublereal *, integer *, integer *, doublereal *, integer *); + integer nibble; + extern int dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, + integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, + integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + char jbcmpz[2]; + integer nwupbd; + logical sorted; + integer lwkopt; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + --wr; + --wi; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + --work; + *info = 0; + if (*n == 0) { + work[1] = 1.; + return 0; + } + if (*n <= 11) { + lwkopt = 1; + if (*lwork != -1) { + dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[1], iloz, ihiz, + &z__[z_offset], ldz, info); + } + } else { + *info = 0; + if (*wantt) { + *(unsigned char *)jbcmpz = 'S'; + } else { + *(unsigned char *)jbcmpz = 'E'; + } + if (*wantz) { + *(unsigned char *)&jbcmpz[1] = 'V'; + } else { + *(unsigned char *)&jbcmpz[1] = 'N'; + } + nwr = ilaenv_(&c__13, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nwr = max(2, nwr); + i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1, i__2); + nwr = min(i__1, nwr); + nsr = ilaenv_(&c__15, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1, i__2), i__2 = *ihi - *ilo; + nsr = min(i__1, i__2); + i__1 = 2, i__2 = nsr - nsr % 2; + nsr = max(i__1, i__2); + i__1 = nwr + 1; + dlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], + ldz, &ls, &ld, &wr[1], &wi[1], &h__[h_offset], ldh, n, &h__[h_offset], ldh, n, + &h__[h_offset], ldh, &work[1], &c_n1); + i__1 = nsr * 3 / 2, i__2 = (integer)work[1]; + lwkopt = max(i__1, i__2); + if (*lwork == -1) { + work[1] = (doublereal)lwkopt; + return 0; + } + nmin = ilaenv_(&c__12, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nmin = max(11, nmin); + nibble = ilaenv_(&c__14, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + nibble = max(0, nibble); + kacc22 = ilaenv_(&c__16, (char *)"DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6, (ftnlen)2); + kacc22 = max(0, kacc22); + kacc22 = min(2, kacc22); + i__1 = (*n - 1) / 3, i__2 = *lwork / 2; + nwmax = min(i__1, i__2); + nw = nwmax; + i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3; + nsmax = min(i__1, i__2); + nsmax -= nsmax % 2; + ndfl = 1; + i__1 = 10, i__2 = *ihi - *ilo + 1; + itmax = max(i__1, i__2) * 30; + kbot = *ihi; + i__1 = itmax; + for (it = 1; it <= i__1; ++it) { + if (kbot < *ilo) { + goto L90; + } + i__2 = *ilo + 1; + for (k = kbot; k >= i__2; --k) { + if (h__[k + (k - 1) * h_dim1] == 0.) { + goto L20; + } + } + k = *ilo; + L20: + ktop = k; + nh = kbot - ktop + 1; + nwupbd = min(nh, nwmax); + if (ndfl < 5) { + nw = min(nwupbd, nwr); + } else { + i__2 = nwupbd, i__3 = nw << 1; + nw = min(i__2, i__3); + } + if (nw < nwmax) { + if (nw >= nh - 1) { + nw = nh; + } else { + kwtop = kbot - nw + 1; + if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1)) > + (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1], abs(d__2))) { + ++nw; + } + } + } + if (ndfl < 5) { + ndec = -1; + } else if (ndec >= 0 || nw >= nwupbd) { + ++ndec; + if (nw - ndec < 2) { + ndec = 0; + } + nw -= ndec; + } + kv = *n - nw + 1; + kt = nw + 1; + nho = *n - nw - 1 - kt + 1; + kwv = nw + 2; + nve = *n - nw - kwv + 1; + dlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh, iloz, ihiz, + &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[kv + h_dim1], ldh, &nho, + &h__[kv + kt * h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork); + kbot -= ld; + ks = kbot - ls + 1; + if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(nmin, nwmax)) { + i__4 = 2, i__5 = kbot - ktop; + i__2 = min(nsmax, nsr), i__3 = max(i__4, i__5); + ns = min(i__2, i__3); + ns -= ns % 2; + if (ndfl % 6 == 0) { + ks = kbot - ns + 1; + i__3 = ks + 1, i__4 = ktop + 2; + i__2 = max(i__3, i__4); + for (i__ = kbot; i__ >= i__2; i__ += -2) { + ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2)); + aa = ss * .75 + h__[i__ + i__ * h_dim1]; + bb = ss; + cc = ss * -.4375; + dd = aa; + dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], + &cs, &sn); + } + if (ks == ktop) { + wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1]; + wi[ks + 1] = 0.; + wr[ks] = wr[ks + 1]; + wi[ks] = wi[ks + 1]; + } + } else { + if (kbot - ks + 1 <= ns / 2) { + ks = kbot - ns + 1; + kt = *n - ns + 1; + dlacpy_((char *)"A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &h__[kt + h_dim1], ldh, + (ftnlen)1); + dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt + h_dim1], ldh, + &wr[ks], &wi[ks], &c__1, &c__1, zdum, &c__1, &inf); + ks += inf; + if (ks >= kbot) { + aa = h__[kbot - 1 + (kbot - 1) * h_dim1]; + cc = h__[kbot + (kbot - 1) * h_dim1]; + bb = h__[kbot - 1 + kbot * h_dim1]; + dd = h__[kbot + kbot * h_dim1]; + dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[kbot - 1], &wr[kbot], + &wi[kbot], &cs, &sn); + ks = kbot - 1; + } + } + if (kbot - ks + 1 > ns) { + sorted = FALSE_; + i__2 = ks + 1; + for (k = kbot; k >= i__2; --k) { + if (sorted) { + goto L60; + } + sorted = TRUE_; + i__3 = k - 1; + for (i__ = ks; i__ <= i__3; ++i__) { + if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[i__], abs(d__2)) < + (d__3 = wr[i__ + 1], abs(d__3)) + + (d__4 = wi[i__ + 1], abs(d__4))) { + sorted = FALSE_; + swap = wr[i__]; + wr[i__] = wr[i__ + 1]; + wr[i__ + 1] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ + 1]; + wi[i__ + 1] = swap; + } + } + } + L60:; + } + i__2 = ks + 2; + for (i__ = kbot; i__ >= i__2; i__ += -2) { + if (wi[i__] != -wi[i__ - 1]) { + swap = wr[i__]; + wr[i__] = wr[i__ - 1]; + wr[i__ - 1] = wr[i__ - 2]; + wr[i__ - 2] = swap; + swap = wi[i__]; + wi[i__] = wi[i__ - 1]; + wi[i__ - 1] = wi[i__ - 2]; + wi[i__ - 2] = swap; + } + } + } + if (kbot - ks + 1 == 2) { + if (wi[kbot] == 0.) { + if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs(d__1)) < + (d__2 = wr[kbot - 1] - h__[kbot + kbot * h_dim1], abs(d__2))) { + wr[kbot - 1] = wr[kbot]; + } else { + wr[kbot] = wr[kbot - 1]; + } + } + } + i__2 = ns, i__3 = kbot - ks + 1; + ns = min(i__2, i__3); + ns -= ns % 2; + ks = kbot - ns + 1; + kdu = ns * 3 - 3; + ku = *n - kdu + 1; + kwh = kdu + 1; + nho = *n - kdu - 3 - (kdu + 1) + 1; + kwv = kdu + 4; + nve = *n - kdu - kwv + 1; + dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks], &wi[ks], + &h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &work[1], &c__3, + &h__[ku + h_dim1], ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, + &h__[ku + kwh * h_dim1], ldh); + } + if (ld > 0) { + ndfl = 1; + } else { + ++ndfl; + } + } + *info = kbot; + L90:; + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlaqr5.cpp b/lib/linalg/dlaqr5.cpp new file mode 100644 index 0000000000..1cd0ac9d88 --- /dev/null +++ b/lib/linalg/dlaqr5.cpp @@ -0,0 +1,521 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b7 = 0.; +static doublereal c_b8 = 1.; +static integer c__3 = 3; +static integer c__1 = 1; +static integer c__2 = 2; +int dlaqr5_(logical *wantt, logical *wantz, integer *kacc22, integer *n, integer *ktop, + integer *kbot, integer *nshfts, doublereal *sr, doublereal *si, doublereal *h__, + integer *ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, + doublereal *v, integer *ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv, + integer *ldwv, integer *nh, doublereal *wh, integer *ldwh) +{ + integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, wh_offset, wv_dim1, + wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublereal d__1, d__2, d__3, d__4, d__5; + integer i__, j, k, m, i2, j2, i4, j4, k1; + doublereal h11, h12, h21, h22; + integer m22, ns, nu; + doublereal vt[3], scl; + integer kdu, kms; + doublereal ulp; + integer knz, kzs; + doublereal tst1, tst2, beta; + logical blk22, bmp22; + integer mend, jcol, jlen, jbot, mbot; + doublereal swap; + integer jtop, jrow, mtop; + doublereal alpha; + logical accum; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer ndcol, incol, krcol, nbmps; + extern int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + dlaqr1_(integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen); + doublereal safmin; + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen); + doublereal safmax, refsum; + integer mstart; + doublereal smlnum; + --sr; + --si; + h_dim1 = *ldh; + h_offset = 1 + h_dim1; + h__ -= h_offset; + z_dim1 = *ldz; + z_offset = 1 + z_dim1; + z__ -= z_offset; + v_dim1 = *ldv; + v_offset = 1 + v_dim1; + v -= v_offset; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + wv_dim1 = *ldwv; + wv_offset = 1 + wv_dim1; + wv -= wv_offset; + wh_dim1 = *ldwh; + wh_offset = 1 + wh_dim1; + wh -= wh_offset; + if (*nshfts < 2) { + return 0; + } + if (*ktop >= *kbot) { + return 0; + } + i__1 = *nshfts - 2; + for (i__ = 1; i__ <= i__1; i__ += 2) { + if (si[i__] != -si[i__ + 1]) { + swap = sr[i__]; + sr[i__] = sr[i__ + 1]; + sr[i__ + 1] = sr[i__ + 2]; + sr[i__ + 2] = swap; + swap = si[i__]; + si[i__] = si[i__ + 1]; + si[i__ + 1] = si[i__ + 2]; + si[i__ + 2] = swap; + } + } + ns = *nshfts - *nshfts % 2; + safmin = dlamch_((char *)"SAFE MINIMUM", (ftnlen)12); + safmax = 1. / safmin; + dlabad_(&safmin, &safmax); + ulp = dlamch_((char *)"PRECISION", (ftnlen)9); + smlnum = safmin * ((doublereal)(*n) / ulp); + accum = *kacc22 == 1 || *kacc22 == 2; + blk22 = ns > 2 && *kacc22 == 2; + if (*ktop + 2 <= *kbot) { + h__[*ktop + 2 + *ktop * h_dim1] = 0.; + } + nbmps = ns / 2; + kdu = nbmps * 6 - 3; + i__1 = *kbot - 2; + i__2 = nbmps * 3 - 2; + for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : incol <= i__1; + incol += i__2) { + ndcol = incol + kdu; + if (accum) { + dlaset_((char *)"ALL", &kdu, &kdu, &c_b7, &c_b8, &u[u_offset], ldu, (ftnlen)3); + } + i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2; + i__3 = min(i__4, i__5); + for (krcol = incol; krcol <= i__3; ++krcol) { + i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1; + mtop = max(i__4, i__5); + i__4 = nbmps, i__5 = (*kbot - krcol) / 3; + mbot = min(i__4, i__5); + m22 = mbot + 1; + bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2; + i__4 = mbot; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + if (k == *ktop - 1) { + dlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m << 1) - 1], + &si[(m << 1) - 1], &sr[m * 2], &si[m * 2], &v[m * v_dim1 + 1]); + alpha = v[m * v_dim1 + 1]; + dlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1]; + dlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * v_dim1 + 1]); + if (h__[k + 3 + k * h_dim1] != 0. || h__[k + 3 + (k + 1) * h_dim1] != 0. || + h__[k + 3 + (k + 2) * h_dim1] == 0.) { + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + dlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(m << 1) - 1], + &si[(m << 1) - 1], &sr[m * 2], &si[m * 2], vt); + alpha = vt[0]; + dlarfg_(&c__3, &alpha, &vt[1], &c__1, vt); + refsum = + vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] * h__[k + 2 + k * h_dim1]); + if ((d__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1], abs(d__1)) + + (d__2 = refsum * vt[2], abs(d__2)) > + ulp * ((d__3 = h__[k + k * h_dim1], abs(d__3)) + + (d__4 = h__[k + 1 + (k + 1) * h_dim1], abs(d__4)) + + (d__5 = h__[k + 2 + (k + 2) * h_dim1], abs(d__5)))) { + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + } else { + h__[k + 1 + k * h_dim1] -= refsum; + h__[k + 2 + k * h_dim1] = 0.; + h__[k + 3 + k * h_dim1] = 0.; + v[m * v_dim1 + 1] = vt[0]; + v[m * v_dim1 + 2] = vt[1]; + v[m * v_dim1 + 3] = vt[2]; + } + } + } + } + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (k == *ktop - 1) { + dlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(m22 << 1) - 1], + &si[(m22 << 1) - 1], &sr[m22 * 2], &si[m22 * 2], &v[m22 * v_dim1 + 1]); + beta = v[m22 * v_dim1 + 1]; + dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 * v_dim1 + 1]); + } else { + beta = h__[k + 1 + k * h_dim1]; + v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1]; + dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 * v_dim1 + 1]); + h__[k + 1 + k * h_dim1] = beta; + h__[k + 2 + k * h_dim1] = 0.; + } + } + if (accum) { + jbot = min(ndcol, *kbot); + } else if (*wantt) { + jbot = *n; + } else { + jbot = *kbot; + } + i__4 = jbot; + for (j = max(*ktop, krcol); j <= i__4; ++j) { + i__5 = mbot, i__6 = (j - krcol + 2) / 3; + mend = min(i__5, i__6); + i__5 = mend; + for (m = mtop; m <= i__5; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + + v[m * v_dim1 + 3] * h__[k + 3 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + if (bmp22) { + k = krcol + (m22 - 1) * 3; + i__4 = k + 1; + i__5 = jbot; + for (j = max(i__4, *ktop); j <= i__5; ++j) { + refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + + v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]); + h__[k + 1 + j * h_dim1] -= refsum; + h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } + if (accum) { + jtop = max(*ktop, incol); + } else if (*wantt) { + jtop = 1; + } else { + jtop = *ktop; + } + i__5 = mbot; + for (m = mtop; m <= i__5; ++m) { + if (v[m * v_dim1 + 1] != 0.) { + k = krcol + (m - 1) * 3; + i__6 = *kbot, i__7 = k + 3; + i__4 = min(i__6, i__7); + for (j = jtop; j <= i__4; ++j) { + refsum = + v[m * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m * v_dim1 + 2] * h__[j + (k + 2) * h_dim1] + + v[m * v_dim1 + 3] * h__[j + (k + 3) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 + 2]; + h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + if (accum) { + kms = k - incol; + i__4 = 1, i__6 = *ktop - incol; + i__7 = kdu; + for (j = max(i__4, i__6); j <= i__7; ++j) { + refsum = + v[m * v_dim1 + 1] * (u[j + (kms + 1) * u_dim1] + + v[m * v_dim1 + 2] * u[j + (kms + 2) * u_dim1] + + v[m * v_dim1 + 3] * u[j + (kms + 3) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m * v_dim1 + 2]; + u[j + (kms + 3) * u_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } else if (*wantz) { + i__7 = *ihiz; + for (j = *iloz; j <= i__7; ++j) { + refsum = + v[m * v_dim1 + 1] * (z__[j + (k + 1) * z_dim1] + + v[m * v_dim1 + 2] * z__[j + (k + 2) * z_dim1] + + v[m * v_dim1 + 3] * z__[j + (k + 3) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m * v_dim1 + 2]; + z__[j + (k + 3) * z_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + } + } + k = krcol + (m22 - 1) * 3; + if (bmp22) { + if (v[m22 * v_dim1 + 1] != 0.) { + i__7 = *kbot, i__4 = k + 3; + i__5 = min(i__7, i__4); + for (j = jtop; j <= i__5; ++j) { + refsum = + v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] + + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1]); + h__[j + (k + 1) * h_dim1] -= refsum; + h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + if (accum) { + kms = k - incol; + i__5 = 1, i__7 = *ktop - incol; + i__4 = kdu; + for (j = max(i__5, i__7); j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * + (u[j + (kms + 1) * u_dim1] + + v[m22 * v_dim1 + 2] * u[j + (kms + 2) * u_dim1]); + u[j + (kms + 1) * u_dim1] -= refsum; + u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } else if (*wantz) { + i__4 = *ihiz; + for (j = *iloz; j <= i__4; ++j) { + refsum = v[m22 * v_dim1 + 1] * + (z__[j + (k + 1) * z_dim1] + + v[m22 * v_dim1 + 2] * z__[j + (k + 2) * z_dim1]); + z__[j + (k + 1) * z_dim1] -= refsum; + z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1 + 2]; + } + } + } + } + mstart = mtop; + if (krcol + (mstart - 1) * 3 < *ktop) { + ++mstart; + } + mend = mbot; + if (bmp22) { + ++mend; + } + if (krcol == *kbot - 2) { + ++mend; + } + i__4 = mend; + for (m = mstart; m <= i__4; ++m) { + i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3; + k = min(i__5, i__7); + if (h__[k + 1 + k * h_dim1] != 0.) { + tst1 = (d__1 = h__[k + k * h_dim1], abs(d__1)) + + (d__2 = h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + if (tst1 == 0.) { + if (k >= *ktop + 1) { + tst1 += (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)); + } + if (k >= *ktop + 2) { + tst1 += (d__1 = h__[k + (k - 2) * h_dim1], abs(d__1)); + } + if (k >= *ktop + 3) { + tst1 += (d__1 = h__[k + (k - 3) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 2) { + tst1 += (d__1 = h__[k + 2 + (k + 1) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 3) { + tst1 += (d__1 = h__[k + 3 + (k + 1) * h_dim1], abs(d__1)); + } + if (k <= *kbot - 4) { + tst1 += (d__1 = h__[k + 4 + (k + 1) * h_dim1], abs(d__1)); + } + } + d__2 = smlnum, d__3 = ulp * tst1; + if ((d__1 = h__[k + 1 + k * h_dim1], abs(d__1)) <= max(d__2, d__3)) { + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs(d__2)); + h12 = max(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)), + d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs(d__2)); + h21 = min(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs(d__1)), + d__4 = + (d__2 = h__[k + k * h_dim1] - h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h11 = max(d__3, d__4); + d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs(d__1)), + d__4 = + (d__2 = h__[k + k * h_dim1] - h__[k + 1 + (k + 1) * h_dim1], abs(d__2)); + h22 = min(d__3, d__4); + scl = h11 + h12; + tst2 = h22 * (h11 / scl); + d__1 = smlnum, d__2 = ulp * tst2; + if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1, d__2)) { + h__[k + 1 + k * h_dim1] = 0.; + } + } + } + } + i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3; + mend = min(i__4, i__5); + i__4 = mend; + for (m = mtop; m <= i__4; ++m) { + k = krcol + (m - 1) * 3; + refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + (k + 3) * h_dim1]; + h__[k + 4 + (k + 1) * h_dim1] = -refsum; + h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2]; + h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3]; + } + } + if (accum) { + if (*wantt) { + jtop = 1; + jbot = *n; + } else { + jtop = *ktop; + jbot = *kbot; + } + if (!blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) { + i__3 = 1, i__4 = *ktop - incol; + k1 = max(i__3, i__4); + i__3 = 0, i__4 = ndcol - *kbot; + nu = kdu - max(i__3, i__4) - k1 + 1; + i__3 = jbot; + i__4 = *nh; + for (jcol = min(ndcol, *kbot) + 1; i__4 < 0 ? jcol >= i__3 : jcol <= i__3; + jcol += i__4) { + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5, i__7); + dgemm_((char *)"C", (char *)"N", &nu, &jlen, &nu, &c_b8, &u[k1 + k1 * u_dim1], ldu, + &h__[incol + k1 + jcol * h_dim1], ldh, &c_b7, &wh[wh_offset], ldwh, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &nu, &jlen, &wh[wh_offset], ldwh, + &h__[incol + k1 + jcol * h_dim1], ldh, (ftnlen)3); + } + i__4 = max(*ktop, incol) - 1; + i__3 = *nv; + for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; jrow += i__3) { + i__5 = *nv, i__7 = max(*ktop, incol) - jrow; + jlen = min(i__5, i__7); + dgemm_((char *)"N", (char *)"N", &jlen, &nu, &nu, &c_b8, &h__[jrow + (incol + k1) * h_dim1], + ldh, &u[k1 + k1 * u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &nu, &wv[wv_offset], ldwv, + &h__[jrow + (incol + k1) * h_dim1], ldh, (ftnlen)3); + } + if (*wantz) { + i__3 = *ihiz; + i__4 = *nv; + for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; jrow += i__4) { + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5, i__7); + dgemm_((char *)"N", (char *)"N", &jlen, &nu, &nu, &c_b8, &z__[jrow + (incol + k1) * z_dim1], + ldz, &u[k1 + k1 * u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &nu, &wv[wv_offset], ldwv, + &z__[jrow + (incol + k1) * z_dim1], ldz, (ftnlen)3); + } + } + } else { + i2 = (kdu + 1) / 2; + i4 = kdu; + j2 = i4 - i2; + j4 = kdu; + kzs = j4 - j2 - (ns + 1); + knz = ns + 1; + i__4 = jbot; + i__3 = *nh; + for (jcol = min(ndcol, *kbot) + 1; i__3 < 0 ? jcol >= i__4 : jcol <= i__4; + jcol += i__3) { + i__5 = *nh, i__7 = jbot - jcol + 1; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * h_dim1], ldh, + &wh[kzs + 1 + wh_dim1], ldwh, (ftnlen)3); + dlaset_((char *)"ALL", &kzs, &jlen, &c_b7, &c_b7, &wh[wh_offset], ldwh, (ftnlen)3); + dtrmm_((char *)"L", (char *)"U", (char *)"C", (char *)"N", &knz, &jlen, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], + ldu, &wh[kzs + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + dgemm_((char *)"C", (char *)"N", &i2, &jlen, &j2, &c_b8, &u[u_offset], ldu, + &h__[incol + 1 + jcol * h_dim1], ldh, &c_b8, &wh[wh_offset], ldwh, + (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1], ldh, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)3); + dtrmm_((char *)"L", (char *)"L", (char *)"C", (char *)"N", &j2, &jlen, &c_b8, &u[(i2 + 1) * u_dim1 + 1], ldu, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"C", (char *)"N", &i__5, &jlen, &i__7, &c_b8, &u[j2 + 1 + (i2 + 1) * u_dim1], + ldu, &h__[incol + 1 + j2 + jcol * h_dim1], ldh, &c_b8, + &wh[i2 + 1 + wh_dim1], ldwh, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &kdu, &jlen, &wh[wh_offset], ldwh, + &h__[incol + 1 + jcol * h_dim1], ldh, (ftnlen)3); + } + i__3 = max(incol, *ktop) - 1; + i__4 = *nv; + for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; jrow += i__4) { + i__5 = *nv, i__7 = max(incol, *ktop) - jrow; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) * h_dim1], ldh, + &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + dlaset_((char *)"ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], ldwv, (ftnlen)3); + dtrmm_((char *)"R", (char *)"U", (char *)"N", (char *)"N", &jlen, &knz, &c_b8, &u[j2 + 1 + (kzs + 1) * u_dim1], + ldu, &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + dgemm_((char *)"N", (char *)"N", &jlen, &i2, &j2, &c_b8, &h__[jrow + (incol + 1) * h_dim1], ldh, + &u[u_offset], ldu, &c_b8, &wv[wv_offset], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &j2, &h__[jrow + (incol + 1) * h_dim1], ldh, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + i__5 = i4 - i2; + dtrmm_((char *)"R", (char *)"L", (char *)"N", (char *)"N", &jlen, &i__5, &c_b8, &u[(i2 + 1) * u_dim1 + 1], ldu, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"N", (char *)"N", &jlen, &i__5, &i__7, &c_b8, + &h__[jrow + (incol + 1 + j2) * h_dim1], ldh, + &u[j2 + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, &wv[(i2 + 1) * wv_dim1 + 1], + ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &kdu, &wv[wv_offset], ldwv, + &h__[jrow + (incol + 1) * h_dim1], ldh, (ftnlen)3); + } + if (*wantz) { + i__4 = *ihiz; + i__3 = *nv; + for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; jrow += i__3) { + i__5 = *nv, i__7 = *ihiz - jrow + 1; + jlen = min(i__5, i__7); + dlacpy_((char *)"ALL", &jlen, &knz, &z__[jrow + (incol + 1 + j2) * z_dim1], ldz, + &wv[(kzs + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + dlaset_((char *)"ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], ldwv, (ftnlen)3); + dtrmm_((char *)"R", (char *)"U", (char *)"N", (char *)"N", &jlen, &knz, &c_b8, + &u[j2 + 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * wv_dim1 + 1], + ldwv, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + dgemm_((char *)"N", (char *)"N", &jlen, &i2, &j2, &c_b8, &z__[jrow + (incol + 1) * z_dim1], + ldz, &u[u_offset], ldu, &c_b8, &wv[wv_offset], ldwv, (ftnlen)1, + (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &j2, &z__[jrow + (incol + 1) * z_dim1], ldz, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)3); + i__5 = i4 - i2; + dtrmm_((char *)"R", (char *)"L", (char *)"N", (char *)"N", &jlen, &i__5, &c_b8, &u[(i2 + 1) * u_dim1 + 1], + ldu, &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1, + (ftnlen)1, (ftnlen)1); + i__5 = i4 - i2; + i__7 = j4 - j2; + dgemm_((char *)"N", (char *)"N", &jlen, &i__5, &i__7, &c_b8, + &z__[jrow + (incol + 1 + j2) * z_dim1], ldz, + &u[j2 + 1 + (i2 + 1) * u_dim1], ldu, &c_b8, + &wv[(i2 + 1) * wv_dim1 + 1], ldwv, (ftnlen)1, (ftnlen)1); + dlacpy_((char *)"ALL", &jlen, &kdu, &wv[wv_offset], ldwv, + &z__[jrow + (incol + 1) * z_dim1], ldz, (ftnlen)3); + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlarfx.cpp b/lib/linalg/dlarfx.cpp new file mode 100644 index 0000000000..44d73f27a9 --- /dev/null +++ b/lib/linalg/dlarfx.cpp @@ -0,0 +1,552 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dlarfx_(char *side, integer *m, integer *n, doublereal *v, doublereal *tau, doublereal *c__, + integer *ldc, doublereal *work, ftnlen side_len) +{ + integer c_dim1, c_offset, i__1; + integer j; + doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, v7, v8, v9, t10, v10, + sum; + extern int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, doublereal *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + --v; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + if (*tau == 0.) { + return 0; + } + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + switch (*m) { + case 1: + goto L10; + case 2: + goto L30; + case 3: + goto L50; + case 4: + goto L70; + case 5: + goto L90; + case 6: + goto L110; + case 7: + goto L130; + case 8: + goto L150; + case 9: + goto L170; + case 10: + goto L190; + } + dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1], (ftnlen)1); + goto L410; + L10: + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1]; + } + goto L410; + L30: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + } + goto L410; + L50: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + } + goto L410; + L70: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + } + goto L410; + L90: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + } + goto L410; + L110: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + } + goto L410; + L130: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + } + goto L410; + L150: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + } + goto L410; + L170: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * c_dim1 + 9]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + } + goto L410; + L190: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 * c__[j * c_dim1 + 3] + + v4 * c__[j * c_dim1 + 4] + v5 * c__[j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + + v7 * c__[j * c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j * c_dim1 + 9] + + v10 * c__[j * c_dim1 + 10]; + c__[j * c_dim1 + 1] -= sum * t1; + c__[j * c_dim1 + 2] -= sum * t2; + c__[j * c_dim1 + 3] -= sum * t3; + c__[j * c_dim1 + 4] -= sum * t4; + c__[j * c_dim1 + 5] -= sum * t5; + c__[j * c_dim1 + 6] -= sum * t6; + c__[j * c_dim1 + 7] -= sum * t7; + c__[j * c_dim1 + 8] -= sum * t8; + c__[j * c_dim1 + 9] -= sum * t9; + c__[j * c_dim1 + 10] -= sum * t10; + } + goto L410; + } else { + switch (*n) { + case 1: + goto L210; + case 2: + goto L230; + case 3: + goto L250; + case 4: + goto L270; + case 5: + goto L290; + case 6: + goto L310; + case 7: + goto L330; + case 8: + goto L350; + case 9: + goto L370; + case 10: + goto L390; + } + dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1], (ftnlen)1); + goto L410; + L210: + t1 = 1. - *tau * v[1] * v[1]; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + c__[j + c_dim1] = t1 * c__[j + c_dim1]; + } + goto L410; + L230: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + } + goto L410; + L250: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + } + goto L410; + L270: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + } + goto L410; + L290: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + } + goto L410; + L310: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + } + goto L410; + L330: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + } + goto L410; + L350: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + } + goto L410; + L370: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[j + c_dim1 * 9]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + } + goto L410; + L390: + v1 = v[1]; + t1 = *tau * v1; + v2 = v[2]; + t2 = *tau * v2; + v3 = v[3]; + t3 = *tau * v3; + v4 = v[4]; + t4 = *tau * v4; + v5 = v[5]; + t5 = *tau * v5; + v6 = v[6]; + t6 = *tau * v6; + v7 = v[7]; + t7 = *tau * v7; + v8 = v[8]; + t8 = *tau * v8; + v9 = v[9]; + t9 = *tau * v9; + v10 = v[10]; + t10 = *tau * v10; + i__1 = *m; + for (j = 1; j <= i__1; ++j) { + sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 * c__[j + c_dim1 * 3] + + v4 * c__[j + (c_dim1 << 2)] + v5 * c__[j + c_dim1 * 5] + + v6 * c__[j + c_dim1 * 6] + v7 * c__[j + c_dim1 * 7] + + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[j + c_dim1 * 9] + + v10 * c__[j + c_dim1 * 10]; + c__[j + c_dim1] -= sum * t1; + c__[j + (c_dim1 << 1)] -= sum * t2; + c__[j + c_dim1 * 3] -= sum * t3; + c__[j + (c_dim1 << 2)] -= sum * t4; + c__[j + c_dim1 * 5] -= sum * t5; + c__[j + c_dim1 * 6] -= sum * t6; + c__[j + c_dim1 * 7] -= sum * t7; + c__[j + (c_dim1 << 3)] -= sum * t8; + c__[j + c_dim1 * 9] -= sum * t9; + c__[j + c_dim1 * 10] -= sum * t10; + } + goto L410; + } +L410: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd0.cpp b/lib/linalg/dlasd0.cpp new file mode 100644 index 0000000000..006c379fa9 --- /dev/null +++ b/lib/linalg/dlasd0.cpp @@ -0,0 +1,143 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__0 = 0; +static integer c__2 = 2; +int dlasd0_(integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, + doublereal *vt, integer *ldvt, integer *smlsiz, integer *iwork, doublereal *work, + integer *info) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; + integer pow_lmp_ii(integer *, integer *); + integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, iwk, lvl, ndb1, nlp1, nrp1; + doublereal beta; + integer idxq, nlvl; + doublereal alpha; + integer inode, ndiml, idxqc, ndimr, itemp, sqrei; + extern int dlasd1_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, + doublereal *, integer *, doublereal *, integer *, integer *, integer *, + doublereal *, integer *), + dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen), + dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), + xerbla_(char *, integer *, ftnlen); + --d__; + --e; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --iwork; + --work; + *info = 0; + if (*n < 0) { + *info = -1; + } else if (*sqre < 0 || *sqre > 1) { + *info = -2; + } + m = *n + *sqre; + if (*ldu < *n) { + *info = -6; + } else if (*ldvt < m) { + *info = -8; + } else if (*smlsiz < 3) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD0", &i__1, (ftnlen)6); + return 0; + } + if (*n <= *smlsiz) { + dlasdq_((char *)"U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], ldu, + &u[u_offset], ldu, &work[1], info, (ftnlen)1); + return 0; + } + inode = 1; + ndiml = inode + *n; + ndimr = ndiml + *n; + idxq = ndimr + *n; + iwk = idxq + *n; + dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); + ndb1 = (nd + 1) / 2; + ncc = 0; + i__1 = nd; + for (i__ = ndb1; i__ <= i__1; ++i__) { + i1 = i__ - 1; + ic = iwork[inode + i1]; + nl = iwork[ndiml + i1]; + nlp1 = nl + 1; + nr = iwork[ndimr + i1]; + nrp1 = nr + 1; + nlf = ic - nl; + nrf = ic + 1; + sqrei = 1; + dlasdq_((char *)"U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[nlf + nlf * vt_dim1], + ldvt, &u[nlf + nlf * u_dim1], ldu, &u[nlf + nlf * u_dim1], ldu, &work[1], info, + (ftnlen)1); + if (*info != 0) { + return 0; + } + itemp = idxq + nlf - 2; + i__2 = nl; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j] = j; + } + if (i__ == nd) { + sqrei = *sqre; + } else { + sqrei = 1; + } + nrp1 = nr + sqrei; + dlasdq_((char *)"U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[nrf + nrf * vt_dim1], + ldvt, &u[nrf + nrf * u_dim1], ldu, &u[nrf + nrf * u_dim1], ldu, &work[1], info, + (ftnlen)1); + if (*info != 0) { + return 0; + } + itemp = idxq + ic; + i__2 = nr; + for (j = 1; j <= i__2; ++j) { + iwork[itemp + j - 1] = j; + } + } + for (lvl = nlvl; lvl >= 1; --lvl) { + if (lvl == 1) { + lf = 1; + ll = 1; + } else { + i__1 = lvl - 1; + lf = pow_lmp_ii(&c__2, &i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; + for (i__ = lf; i__ <= i__1; ++i__) { + im1 = i__ - 1; + ic = iwork[inode + im1]; + nl = iwork[ndiml + im1]; + nr = iwork[ndimr + im1]; + nlf = ic - nl; + if (*sqre == 0 && i__ == ll) { + sqrei = *sqre; + } else { + sqrei = 1; + } + idxqc = idxq + nlf - 1; + alpha = d__[ic]; + beta = e[ic]; + dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf * u_dim1], ldu, + &vt[nlf + nlf * vt_dim1], ldvt, &iwork[idxqc], &iwork[iwk], &work[1], info); + if (*info != 0) { + return 0; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd1.cpp b/lib/linalg/dlasd1.cpp new file mode 100644 index 0000000000..e7b7fba747 --- /dev/null +++ b/lib/linalg/dlasd1.cpp @@ -0,0 +1,96 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__0 = 0; +static doublereal c_b7 = 1.; +static integer c__1 = 1; +static integer c_n1 = -1; +int dlasd1_(integer *nl, integer *nr, integer *sqre, doublereal *d__, doublereal *alpha, + doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, + integer *idxq, integer *iwork, doublereal *work, integer *info) +{ + integer u_dim1, u_offset, vt_dim1, vt_offset, i__1; + doublereal d__1, d__2; + integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, idxc, idxp, ldvt2; + extern int dlasd2_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, + doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *), + dlasd3_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *); + integer isigma; + extern int xerbla_(char *, integer *, ftnlen); + doublereal orgnrm; + integer coltyp; + --d__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --idxq; + --iwork; + --work; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre < 0 || *sqre > 1) { + *info = -3; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD1", &i__1, (ftnlen)6); + return 0; + } + n = *nl + *nr + 1; + m = n + *sqre; + ldu2 = n; + ldvt2 = m; + iz = 1; + isigma = iz + m; + iu2 = isigma + n; + ivt2 = iu2 + ldu2 * n; + iq = ivt2 + ldvt2 * m; + idx = 1; + idxc = idx + n; + coltyp = idxc + n; + idxp = coltyp + n; + d__1 = abs(*alpha), d__2 = abs(*beta); + orgnrm = max(d__1, d__2); + d__[*nl + 1] = 0.; + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + if ((d__1 = d__[i__], abs(d__1)) > orgnrm) { + orgnrm = (d__1 = d__[i__], abs(d__1)); + } + } + dlascl_((char *)"G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info, (ftnlen)1); + *alpha /= orgnrm; + *beta /= orgnrm; + dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], ldu, &vt[vt_offset], + ldvt, &work[isigma], &work[iu2], &ldu2, &work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], + &iwork[idxc], &idxq[1], &iwork[coltyp], info); + ldq = k; + dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[u_offset], ldu, + &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[ivt2], &ldvt2, &iwork[idxc], + &iwork[coltyp], &work[iz], info); + if (*info != 0) { + return 0; + } + dlascl_((char *)"G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info, (ftnlen)1); + n1 = k; + n2 = n - k; + dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd2.cpp b/lib/linalg/dlasd2.cpp new file mode 100644 index 0000000000..36562850e0 --- /dev/null +++ b/lib/linalg/dlasd2.cpp @@ -0,0 +1,282 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static doublereal c_b30 = 0.; +int dlasd2_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *z__, + doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *vt, + integer *ldvt, doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2, + integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *idxq, + integer *coltyp, integer *info) +{ + integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1; + doublereal d__1, d__2; + doublereal c__; + integer i__, j, m, n; + doublereal s; + integer k2; + doublereal z1; + integer ct, jp; + doublereal eps, tau, tol; + integer psm[4], nlp1, nlp2, idxi, idxj; + extern int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, + doublereal *); + integer ctot[4], idxjp; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer jprev; + extern doublereal dlapy2_(doublereal *, doublereal *), dlamch_(char *, ftnlen); + extern int dlamrg_(integer *, integer *, doublereal *, integer *, integer *, integer *), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen); + doublereal hlftol; + --d__; + --z__; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + --dsigma; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxp; + --idx; + --idxc; + --idxq; + --coltyp; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + n = *nl + *nr + 1; + m = n + *sqre; + if (*ldu < n) { + *info = -10; + } else if (*ldvt < m) { + *info = -12; + } else if (*ldu2 < n) { + *info = -15; + } else if (*ldvt2 < m) { + *info = -17; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD2", &i__1, (ftnlen)6); + return 0; + } + nlp1 = *nl + 1; + nlp2 = *nl + 2; + z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; + z__[1] = z1; + for (i__ = *nl; i__ >= 1; --i__) { + z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; + d__[i__ + 1] = d__[i__]; + idxq[i__ + 1] = idxq[i__] + 1; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; + } + i__1 = nlp1; + for (i__ = 2; i__ <= i__1; ++i__) { + coltyp[i__] = 1; + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + coltyp[i__] = 2; + } + i__1 = n; + for (i__ = nlp2; i__ <= i__1; ++i__) { + idxq[i__] += nlp1; + } + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + dsigma[i__] = d__[idxq[i__]]; + u2[i__ + u2_dim1] = z__[idxq[i__]]; + idxc[i__] = coltyp[idxq[i__]]; + } + dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); + i__1 = n; + for (i__ = 2; i__ <= i__1; ++i__) { + idxi = idx[i__] + 1; + d__[i__] = dsigma[idxi]; + z__[i__] = u2[idxi + u2_dim1]; + coltyp[i__] = idxc[idxi]; + } + eps = dlamch_((char *)"Epsilon", (ftnlen)7); + d__1 = abs(*alpha), d__2 = abs(*beta); + tol = max(d__1, d__2); + d__2 = (d__1 = d__[n], abs(d__1)); + tol = eps * 8. * max(d__2, tol); + *k = 1; + k2 = n + 1; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + if ((d__1 = z__[j], abs(d__1)) <= tol) { + --k2; + idxp[k2] = j; + coltyp[j] = 4; + if (j == n) { + goto L120; + } + } else { + jprev = j; + goto L90; + } + } +L90: + j = jprev; +L100: + ++j; + if (j > n) { + goto L110; + } + if ((d__1 = z__[j], abs(d__1)) <= tol) { + --k2; + idxp[k2] = j; + coltyp[j] = 4; + } else { + if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) { + s = z__[jprev]; + c__ = z__[j]; + tau = dlapy2_(&c__, &s); + c__ /= tau; + s = -s / tau; + z__[j] = tau; + z__[jprev] = 0.; + idxjp = idxq[idx[jprev] + 1]; + idxj = idxq[idx[j] + 1]; + if (idxjp <= nlp1) { + --idxjp; + } + if (idxj <= nlp1) { + --idxj; + } + drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &c__1, &c__, &s); + drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &c__, &s); + if (coltyp[j] != coltyp[jprev]) { + coltyp[j] = 3; + } + coltyp[jprev] = 4; + --k2; + idxp[k2] = jprev; + jprev = j; + } else { + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; + jprev = j; + } + } + goto L100; +L110: + ++(*k); + u2[*k + u2_dim1] = z__[jprev]; + dsigma[*k] = d__[jprev]; + idxp[*k] = jprev; +L120: + for (j = 1; j <= 4; ++j) { + ctot[j - 1] = 0; + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + ct = coltyp[j]; + ++ctot[ct - 1]; + } + psm[0] = 2; + psm[1] = ctot[0] + 2; + psm[2] = psm[1] + ctot[1]; + psm[3] = psm[2] + ctot[2]; + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + ct = coltyp[jp]; + idxc[psm[ct - 1]] = j; + ++psm[ct - 1]; + } + i__1 = n; + for (j = 2; j <= i__1; ++j) { + jp = idxp[j]; + dsigma[j] = d__[jp]; + idxj = idxq[idx[idxp[idxc[j]]] + 1]; + if (idxj <= nlp1) { + --idxj; + } + dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); + dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); + } + dsigma[1] = 0.; + hlftol = tol / 2.; + if (abs(dsigma[2]) <= hlftol) { + dsigma[2] = hlftol; + } + if (m > n) { + z__[1] = dlapy2_(&z1, &z__[m]); + if (z__[1] <= tol) { + c__ = 1.; + s = 0.; + z__[1] = tol; + } else { + c__ = z1 / z__[1]; + s = z__[m] / z__[1]; + } + } else { + if (abs(z1) <= tol) { + z__[1] = tol; + } else { + z__[1] = z1; + } + } + i__1 = *k - 1; + dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); + dlaset_((char *)"A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2, (ftnlen)1); + u2[nlp1 + u2_dim1] = 1.; + if (m > n) { + i__1 = nlp1; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; + vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; + vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; + } + } else { + dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); + } + if (m > n) { + dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); + } + if (n > *k) { + i__1 = n - *k; + dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); + i__1 = n - *k; + dlacpy_((char *)"A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) * u_dim1 + 1], ldu, + (ftnlen)1); + i__1 = n - *k; + dlacpy_((char *)"A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + vt_dim1], ldvt, + (ftnlen)1); + } + for (j = 1; j <= 4; ++j) { + coltyp[j] = ctot[j - 1]; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasd3.cpp b/lib/linalg/dlasd3.cpp new file mode 100644 index 0000000000..745c613e08 --- /dev/null +++ b/lib/linalg/dlasd3.cpp @@ -0,0 +1,218 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__0 = 0; +static doublereal c_b13 = 1.; +static doublereal c_b26 = 0.; +int dlasd3_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *q, + integer *ldq, doublereal *dsigma, doublereal *u, integer *ldu, doublereal *u2, + integer *ldu2, doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, + integer *idxc, integer *ctot, doublereal *z__, integer *info) +{ + integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, + vt2_offset, i__1, i__2; + doublereal d__1, d__2; + double sqrt(doublereal), d_lmp_sign(doublereal *, doublereal *); + integer i__, j, m, n, jc; + doublereal rho; + integer nlp1, nlp2, nrp1; + doublereal temp; + extern doublereal dnrm2_(integer *, doublereal *, integer *); + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + integer ctemp; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + integer ktemp; + extern doublereal dlamc3_(doublereal *, doublereal *); + extern int dlasd4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, + doublereal *, doublereal *, integer *), + dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, + ftnlen), + xerbla_(char *, integer *, ftnlen); + --d__; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --dsigma; + u_dim1 = *ldu; + u_offset = 1 + u_dim1; + u -= u_offset; + u2_dim1 = *ldu2; + u2_offset = 1 + u2_dim1; + u2 -= u2_offset; + vt_dim1 = *ldvt; + vt_offset = 1 + vt_dim1; + vt -= vt_offset; + vt2_dim1 = *ldvt2; + vt2_offset = 1 + vt2_dim1; + vt2 -= vt2_offset; + --idxc; + --ctot; + --z__; + *info = 0; + if (*nl < 1) { + *info = -1; + } else if (*nr < 1) { + *info = -2; + } else if (*sqre != 1 && *sqre != 0) { + *info = -3; + } + n = *nl + *nr + 1; + m = n + *sqre; + nlp1 = *nl + 1; + nlp2 = *nl + 2; + if (*k < 1 || *k > n) { + *info = -4; + } else if (*ldq < *k) { + *info = -7; + } else if (*ldu < n) { + *info = -10; + } else if (*ldu2 < n) { + *info = -12; + } else if (*ldvt < m) { + *info = -14; + } else if (*ldvt2 < m) { + *info = -16; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLASD3", &i__1, (ftnlen)6); + return 0; + } + if (*k == 1) { + d__[1] = abs(z__[1]); + dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); + if (z__[1] > 0.) { + dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); + } else { + i__1 = n; + for (i__ = 1; i__ <= i__1; ++i__) { + u[i__ + u_dim1] = -u2[i__ + u2_dim1]; + } + } + return 0; + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; + } + dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); + rho = dnrm2_(k, &z__[1], &c__1); + dlascl_((char *)"G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info, (ftnlen)1); + rho *= rho; + i__1 = *k; + for (j = 1; j <= i__1; ++j) { + dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], &vt[j * vt_dim1 + 1], + info); + if (*info != 0) { + return 0; + } + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; + i__2 = i__ - 1; + for (j = 1; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[i__] - dsigma[j]) / + (dsigma[i__] + dsigma[j]); + } + i__2 = *k - 1; + for (j = i__; j <= i__2; ++j) { + z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / + (dsigma[i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); + } + d__2 = sqrt((d__1 = z__[i__], abs(d__1))); + z__[i__] = d_lmp_sign(&d__2, &q[i__ + q_dim1]); + } + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * vt_dim1 + 1]; + u[i__ * u_dim1 + 1] = -1.; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ * vt_dim1]; + u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; + } + temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); + q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; + } + } + if (*k == 2) { + dgemm_((char *)"N", (char *)"N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset], ldq, &c_b26, + &u[u_offset], ldu, (ftnlen)1, (ftnlen)1); + goto L100; + } + if (ctot[1] > 0) { + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1], ldu2, &q[q_dim1 + 2], + ldq, &c_b26, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + } + } else if (ctot[3] > 0) { + ktemp = ctot[1] + 2 + ctot[2]; + dgemm_((char *)"N", (char *)"N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, + &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu, (ftnlen)1, (ftnlen)1); + } else { + dlacpy_((char *)"F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu, (ftnlen)1); + } + dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); + ktemp = ctot[1] + 2; + ctemp = ctot[2] + ctot[3]; + dgemm_((char *)"N", (char *)"N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2, &q[ktemp + q_dim1], + ldq, &c_b26, &u[nlp2 + u_dim1], ldu, (ftnlen)1, (ftnlen)1); +L100: + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); + q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; + i__2 = *k; + for (j = 2; j <= i__2; ++j) { + jc = idxc[j]; + q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; + } + } + if (*k == 2) { + dgemm_((char *)"N", (char *)"N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset], ldvt2, &c_b26, + &vt[vt_offset], ldvt, (ftnlen)1, (ftnlen)1); + return 0; + } + ktemp = ctot[1] + 1; + dgemm_((char *)"N", (char *)"N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[vt2_dim1 + 1], ldvt2, + &c_b26, &vt[vt_dim1 + 1], ldvt, (ftnlen)1, (ftnlen)1); + ktemp = ctot[1] + 2 + ctot[2]; + if (ktemp <= *ldvt2) { + dgemm_((char *)"N", (char *)"N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1], ldq, + &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1], ldvt, (ftnlen)1, (ftnlen)1); + } + ktemp = ctot[1] + 1; + nrp1 = *nr + *sqre; + if (ktemp > 1) { + i__1 = *k; + for (i__ = 1; i__ <= i__1; ++i__) { + q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; + } + i__1 = m; + for (i__ = nlp2; i__ <= i__1; ++i__) { + vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; + } + } + ctemp = ctot[2] + 1 + ctot[3]; + dgemm_((char *)"N", (char *)"N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, + &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 + 1], ldvt, (ftnlen)1, + (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasy2.cpp b/lib/linalg/dlasy2.cpp new file mode 100644 index 0000000000..94e9ed0e7c --- /dev/null +++ b/lib/linalg/dlasy2.cpp @@ -0,0 +1,284 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__4 = 4; +static integer c__1 = 1; +static integer c__16 = 16; +static integer c__0 = 0; +int dlasy2_(logical *ltranl, logical *ltranr, integer *isgn, integer *n1, integer *n2, + doublereal *tl, integer *ldtl, doublereal *tr, integer *ldtr, doublereal *b, + integer *ldb, doublereal *scale, doublereal *x, integer *ldx, doublereal *xnorm, + integer *info) +{ + static integer locu12[4] = {3, 4, 1, 2}; + static integer locl21[4] = {2, 1, 4, 3}; + static integer locu22[4] = {4, 3, 2, 1}; + static logical xswpiv[4] = {FALSE_, FALSE_, TRUE_, TRUE_}; + static logical bswpiv[4] = {FALSE_, TRUE_, FALSE_, TRUE_}; + integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1, x_offset; + doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8; + integer i__, j, k; + doublereal x2[2], l21, u11, u12; + integer ip, jp; + doublereal u22, t16[16], gam, bet, eps, sgn, tmp[4], tau1, btmp[4], smin; + integer ipiv; + doublereal temp; + integer jpiv[4]; + doublereal xmax; + integer ipsv, jpsv; + logical bswap; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical xswap; + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + doublereal smlnum; + tl_dim1 = *ldtl; + tl_offset = 1 + tl_dim1; + tl -= tl_offset; + tr_dim1 = *ldtr; + tr_offset = 1 + tr_dim1; + tr -= tr_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + x_dim1 = *ldx; + x_offset = 1 + x_dim1; + x -= x_offset; + *info = 0; + if (*n1 == 0 || *n2 == 0) { + return 0; + } + eps = dlamch_((char *)"P", (ftnlen)1); + smlnum = dlamch_((char *)"S", (ftnlen)1) / eps; + sgn = (doublereal)(*isgn); + k = *n1 + *n1 + *n2 - 2; + switch (k) { + case 1: + goto L10; + case 2: + goto L20; + case 3: + goto L30; + case 4: + goto L50; + } +L10: + tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + bet = abs(tau1); + if (bet <= smlnum) { + tau1 = smlnum; + bet = smlnum; + *info = 1; + } + *scale = 1.; + gam = (d__1 = b[b_dim1 + 1], abs(d__1)); + if (smlnum * gam > bet) { + *scale = 1. / gam; + } + x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)); + return 0; +L20: + d__7 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__8 = (d__2 = tr[tr_dim1 + 1], abs(d__2)), + d__7 = max(d__7, d__8), d__8 = (d__3 = tr[(tr_dim1 << 1) + 1], abs(d__3)), + d__7 = max(d__7, d__8), d__8 = (d__4 = tr[tr_dim1 + 2], abs(d__4)), d__7 = max(d__7, d__8), + d__8 = (d__5 = tr[(tr_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7, d__8); + smin = max(d__6, smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranr) { + tmp[1] = sgn * tr[tr_dim1 + 2]; + tmp[2] = sgn * tr[(tr_dim1 << 1) + 1]; + } else { + tmp[1] = sgn * tr[(tr_dim1 << 1) + 1]; + tmp[2] = sgn * tr[tr_dim1 + 2]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[(b_dim1 << 1) + 1]; + goto L40; +L30: + d__7 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__8 = (d__2 = tl[tl_dim1 + 1], abs(d__2)), + d__7 = max(d__7, d__8), d__8 = (d__3 = tl[(tl_dim1 << 1) + 1], abs(d__3)), + d__7 = max(d__7, d__8), d__8 = (d__4 = tl[tl_dim1 + 2], abs(d__4)), d__7 = max(d__7, d__8), + d__8 = (d__5 = tl[(tl_dim1 << 1) + 2], abs(d__5)); + d__6 = eps * max(d__7, d__8); + smin = max(d__6, smlnum); + tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + if (*ltranl) { + tmp[1] = tl[(tl_dim1 << 1) + 1]; + tmp[2] = tl[tl_dim1 + 2]; + } else { + tmp[1] = tl[tl_dim1 + 2]; + tmp[2] = tl[(tl_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; +L40: + ipiv = idamax_(&c__4, tmp, &c__1); + u11 = tmp[ipiv - 1]; + if (abs(u11) <= smin) { + *info = 1; + u11 = smin; + } + u12 = tmp[locu12[ipiv - 1] - 1]; + l21 = tmp[locl21[ipiv - 1] - 1] / u11; + u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21; + xswap = xswpiv[ipiv - 1]; + bswap = bswpiv[ipiv - 1]; + if (abs(u22) <= smin) { + *info = 1; + u22 = smin; + } + if (bswap) { + temp = btmp[1]; + btmp[1] = btmp[0] - l21 * temp; + btmp[0] = temp; + } else { + btmp[1] -= l21 * btmp[0]; + } + *scale = 1.; + if (smlnum * 2. * abs(btmp[1]) > abs(u22) || smlnum * 2. * abs(btmp[0]) > abs(u11)) { + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]); + *scale = .5 / max(d__1, d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + } + x2[1] = btmp[1] / u22; + x2[0] = btmp[0] / u11 - u12 / u11 * x2[1]; + if (xswap) { + temp = x2[1]; + x2[1] = x2[0]; + x2[0] = temp; + } + x[x_dim1 + 1] = x2[0]; + if (*n1 == 1) { + x[(x_dim1 << 1) + 1] = x2[1]; + *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1) + 1], abs(d__2)); + } else { + x[x_dim1 + 2] = x2[1]; + d__3 = (d__1 = x[x_dim1 + 1], abs(d__1)), d__4 = (d__2 = x[x_dim1 + 2], abs(d__2)); + *xnorm = max(d__3, d__4); + } + return 0; +L50: + d__5 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__6 = (d__2 = tr[(tr_dim1 << 1) + 1], abs(d__2)), + d__5 = max(d__5, d__6), d__6 = (d__3 = tr[tr_dim1 + 2], abs(d__3)), d__5 = max(d__5, d__6), + d__6 = (d__4 = tr[(tr_dim1 << 1) + 2], abs(d__4)); + smin = max(d__5, d__6); + d__5 = smin, d__6 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__5 = max(d__5, d__6), + d__6 = (d__2 = tl[(tl_dim1 << 1) + 1], abs(d__2)), d__5 = max(d__5, d__6), + d__6 = (d__3 = tl[tl_dim1 + 2], abs(d__3)), d__5 = max(d__5, d__6), + d__6 = (d__4 = tl[(tl_dim1 << 1) + 2], abs(d__4)); + smin = max(d__5, d__6); + d__1 = eps * smin; + smin = max(d__1, smlnum); + btmp[0] = 0.; + dcopy_(&c__16, btmp, &c__0, t16, &c__1); + t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1]; + t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1]; + t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2]; + t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2]; + if (*ltranl) { + t16[4] = tl[tl_dim1 + 2]; + t16[1] = tl[(tl_dim1 << 1) + 1]; + t16[14] = tl[tl_dim1 + 2]; + t16[11] = tl[(tl_dim1 << 1) + 1]; + } else { + t16[4] = tl[(tl_dim1 << 1) + 1]; + t16[1] = tl[tl_dim1 + 2]; + t16[14] = tl[(tl_dim1 << 1) + 1]; + t16[11] = tl[tl_dim1 + 2]; + } + if (*ltranr) { + t16[8] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[13] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[2] = sgn * tr[tr_dim1 + 2]; + t16[7] = sgn * tr[tr_dim1 + 2]; + } else { + t16[8] = sgn * tr[tr_dim1 + 2]; + t16[13] = sgn * tr[tr_dim1 + 2]; + t16[2] = sgn * tr[(tr_dim1 << 1) + 1]; + t16[7] = sgn * tr[(tr_dim1 << 1) + 1]; + } + btmp[0] = b[b_dim1 + 1]; + btmp[1] = b[b_dim1 + 2]; + btmp[2] = b[(b_dim1 << 1) + 1]; + btmp[3] = b[(b_dim1 << 1) + 2]; + for (i__ = 1; i__ <= 3; ++i__) { + xmax = 0.; + for (ip = i__; ip <= 4; ++ip) { + for (jp = i__; jp <= 4; ++jp) { + if ((d__1 = t16[ip + (jp << 2) - 5], abs(d__1)) >= xmax) { + xmax = (d__1 = t16[ip + (jp << 2) - 5], abs(d__1)); + ipsv = ip; + jpsv = jp; + } + } + } + if (ipsv != i__) { + dswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4); + temp = btmp[i__ - 1]; + btmp[i__ - 1] = btmp[ipsv - 1]; + btmp[ipsv - 1] = temp; + } + if (jpsv != i__) { + dswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4], &c__1); + } + jpiv[i__ - 1] = jpsv; + if ((d__1 = t16[i__ + (i__ << 2) - 5], abs(d__1)) < smin) { + *info = 1; + t16[i__ + (i__ << 2) - 5] = smin; + } + for (j = i__ + 1; j <= 4; ++j) { + t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5]; + btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1]; + for (k = i__ + 1; k <= 4; ++k) { + t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + (k << 2) - 5]; + } + } + } + if (abs(t16[15]) < smin) { + *info = 1; + t16[15] = smin; + } + *scale = 1.; + if (smlnum * 8. * abs(btmp[0]) > abs(t16[0]) || smlnum * 8. * abs(btmp[1]) > abs(t16[5]) || + smlnum * 8. * abs(btmp[2]) > abs(t16[10]) || smlnum * 8. * abs(btmp[3]) > abs(t16[15])) { + d__1 = abs(btmp[0]), d__2 = abs(btmp[1]), d__1 = max(d__1, d__2), d__2 = abs(btmp[2]), + d__1 = max(d__1, d__2), d__2 = abs(btmp[3]); + *scale = .125 / max(d__1, d__2); + btmp[0] *= *scale; + btmp[1] *= *scale; + btmp[2] *= *scale; + btmp[3] *= *scale; + } + for (i__ = 1; i__ <= 4; ++i__) { + k = 5 - i__; + temp = 1. / t16[k + (k << 2) - 5]; + tmp[k - 1] = btmp[k - 1] * temp; + for (j = k + 1; j <= 4; ++j) { + tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1]; + } + } + for (i__ = 1; i__ <= 3; ++i__) { + if (jpiv[4 - i__ - 1] != 4 - i__) { + temp = tmp[4 - i__ - 1]; + tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1]; + tmp[jpiv[4 - i__ - 1] - 1] = temp; + } + } + x[x_dim1 + 1] = tmp[0]; + x[x_dim1 + 2] = tmp[1]; + x[(x_dim1 << 1) + 1] = tmp[2]; + x[(x_dim1 << 1) + 2] = tmp[3]; + d__1 = abs(tmp[0]) + abs(tmp[2]), d__2 = abs(tmp[1]) + abs(tmp[3]); + *xnorm = max(d__1, d__2); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlasyf.cpp b/lib/linalg/dlasyf.cpp new file mode 100644 index 0000000000..aaafd1a88f --- /dev/null +++ b/lib/linalg/dlasyf.cpp @@ -0,0 +1,337 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static doublereal c_b8 = -1.; +static doublereal c_b9 = 1.; +int dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublereal *a, integer *lda, + integer *ipiv, doublereal *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3; + double sqrt(doublereal); + integer j, k; + doublereal t, r1, d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern int dscal_(integer *, doublereal *, doublereal *, integer *), + dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, + ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), + dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer kstep; + doublereal absakk; + extern integer idamax_(integer *, doublereal *, integer *); + doublereal colmax, rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + dcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + } + kstep = 1; + absakk = (d__1 = w[k + kw * w_dim1], abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = idamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + colmax = (d__1 = w[imax + kw * w_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + dcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = k - imax; + dcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + } + i__1 = k - imax; + jmax = imax + idamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + rowmax = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); + if (imax > 1) { + i__1 = imax - 1; + jmax = idamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + d__2 = rowmax, d__3 = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = w[imax + (kw - 1) * w_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + a[kp + kp * a_dim1] = a[kk + kk * a_dim1]; + i__1 = kk - 1 - kp; + dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + dcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + dswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + dswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + r1 = 1. / a[k + k * a_dim1]; + i__1 = k - 1; + dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + d21 = w[k - 1 + kw * w_dim1]; + d11 = w[k + kw * w_dim1] / d21; + d22 = w[k - 1 + (kw - 1) * w_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + a[j + (k - 1) * a_dim1] = + d21 * (d11 * w[j + (kw - 1) * w_dim1] - w[j + kw * w_dim1]); + a[j + k * a_dim1] = + d21 * (d22 * w[j + kw * w_dim1] - w[j + (kw - 1) * w_dim1]); + } + } + a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1]; + a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1]; + a[k + k * a_dim1] = w[k + kw * w_dim1]; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj - j + 1; + i__4 = *n - k; + dgemv_((char *)"No transpose", &i__3, &i__4, &c_b8, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b9, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + } + i__2 = j - 1; + i__3 = *n - k; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &c_b8, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b9, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + dswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + i__1 = *n - k + 1; + dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + dgemv_((char *)"No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b9, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + kstep = 1; + absakk = (d__1 = w[k + k * w_dim1], abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + idamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + colmax = (d__1 = w[imax + k * w_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + dcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = *n - imax + 1; + dcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + dgemv_((char *)"No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b9, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax - k; + jmax = k - 1 + idamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + rowmax = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + idamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + d__2 = rowmax, d__3 = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = w[imax + (k + 1) * w_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + kk = k + kstep - 1; + if (kp != kk) { + a[kp + kp * a_dim1] = a[kk + kk * a_dim1]; + i__1 = kp - kk - 1; + dcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + dswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + dswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + r1 = 1. / a[k + k * a_dim1]; + i__1 = *n - k; + dscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + d21 = w[k + 1 + k * w_dim1]; + d11 = w[k + 1 + (k + 1) * w_dim1] / d21; + d22 = w[k + k * w_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + a[j + k * a_dim1] = + d21 * (d11 * w[j + k * w_dim1] - w[j + (k + 1) * w_dim1]); + a[j + (k + 1) * a_dim1] = + d21 * (d22 * w[j + (k + 1) * w_dim1] - w[j + k * w_dim1]); + } + } + a[k + k * a_dim1] = w[k + k * w_dim1]; + a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1]; + a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1]; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = j + jb - jj; + i__5 = k - 1; + dgemv_((char *)"No transpose", &i__4, &i__5, &c_b8, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b9, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &c_b8, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b9, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + dswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dorghr.cpp b/lib/linalg/dorghr.cpp new file mode 100644 index 0000000000..80ffa7dbc5 --- /dev/null +++ b/lib/linalg/dorghr.cpp @@ -0,0 +1,94 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +int dorghr_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, + doublereal *work, integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2; + integer i__, j, nb, nh, iinfo; + extern int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dorgqr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, + doublereal *, integer *, integer *); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + --work; + *info = 0; + nh = *ihi - *ilo; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*ilo < 1 || *ilo > max(1, *n)) { + *info = -2; + } else if (*ihi < min(*ilo, *n) || *ihi > *n) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*lwork < max(1, nh) && !lquery) { + *info = -8; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"DORGQR", (char *)" ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = max(1, nh) * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DORGHR", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + work[1] = 1.; + return 0; + } + i__1 = *ilo + 1; + for (j = *ihi; j >= i__1; --j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + i__2 = *ihi; + for (i__ = j + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1]; + } + i__2 = *n; + for (i__ = *ihi + 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + } + i__1 = *ilo; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + a[j + j * a_dim1] = 1.; + } + i__1 = *n; + for (j = *ihi + 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = 0.; + } + a[j + j * a_dim1] = 1.; + } + if (nh > 0) { + dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*ilo], &work[1], lwork, + &iinfo); + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dormhr.cpp b/lib/linalg/dormhr.cpp new file mode 100644 index 0000000000..9cb0cd6690 --- /dev/null +++ b/lib/linalg/dormhr.cpp @@ -0,0 +1,111 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int dormhr_(char *side, char *trans, integer *m, integer *n, integer *ilo, integer *ihi, + doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, + doublereal *work, integer *lwork, integer *info, ftnlen side_len, ftnlen trans_len) +{ + address a__1[2]; + integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer i1, i2, nb, mi, nh, ni, nq, nw; + logical left; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + extern int xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, + doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, + ftnlen, ftnlen); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --tau; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + --work; + *info = 0; + nh = *ihi - *ilo; + left = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (left) { + nq = *m; + nw = *n; + } else { + nq = *n; + nw = *m; + } + if (!left && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*m < 0) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*ilo < 1 || *ilo > max(1, nq)) { + *info = -5; + } else if (*ihi < min(*ilo, nq) || *ihi > nq) { + *info = -6; + } else if (*lda < max(1, nq)) { + *info = -8; + } else if (*ldc < max(1, *m)) { + *info = -11; + } else if (*lwork < max(1, nw) && !lquery) { + *info = -13; + } + if (*info == 0) { + if (left) { + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen)6, (ftnlen)2); + } else { + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = trans; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DORMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen)6, (ftnlen)2); + } + lwkopt = max(1, nw) * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"DORMHR", &i__2, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*m == 0 || *n == 0 || nh == 0) { + work[1] = 1.; + return 0; + } + if (left) { + mi = nh; + ni = *n; + i1 = *ilo + 1; + i2 = 1; + } else { + mi = *m; + ni = nh; + i1 = 1; + i2 = *ilo + 1; + } + dormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, &tau[*ilo], + &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo, (ftnlen)1, (ftnlen)1); + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsyconv.cpp b/lib/linalg/dsyconv.cpp new file mode 100644 index 0000000000..9d4a2908ae --- /dev/null +++ b/lib/linalg/dsyconv.cpp @@ -0,0 +1,199 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dsyconv_(char *uplo, char *way, integer *n, doublereal *a, integer *lda, integer *ipiv, + doublereal *e, integer *info, ftnlen uplo_len, ftnlen way_len) +{ + integer a_dim1, a_offset, i__1; + integer i__, j, ip; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + logical convert; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --e; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + convert = lsame_(way, (char *)"C", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!convert && !lsame_(way, (char *)"R", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYCONV", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + if (convert) { + i__ = *n; + e[1] = 0.; + while (i__ > 1) { + if (ipiv[i__] < 0) { + e[i__] = a[i__ - 1 + i__ * a_dim1]; + e[i__ - 1] = 0.; + a[i__ - 1 + i__ * a_dim1] = 0.; + --i__; + } else { + e[i__] = 0.; + } + --i__; + } + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ - 1 + j * a_dim1]; + a[i__ - 1 + j * a_dim1] = temp; + } + } + --i__; + } + --i__; + } + } else { + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + ++i__; + if (i__ < *n) { + i__1 = *n; + for (j = i__ + 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ - 1 + j * a_dim1]; + a[i__ - 1 + j * a_dim1] = temp; + } + } + } + ++i__; + } + i__ = *n; + while (i__ > 1) { + if (ipiv[i__] < 0) { + a[i__ - 1 + i__ * a_dim1] = e[i__]; + --i__; + } + --i__; + } + } + } else { + if (convert) { + i__ = 1; + e[*n] = 0.; + while (i__ <= *n) { + if (i__ < *n && ipiv[i__] < 0) { + e[i__] = a[i__ + 1 + i__ * a_dim1]; + e[i__ + 1] = 0.; + a[i__ + 1 + i__ * a_dim1] = 0.; + ++i__; + } else { + e[i__] = 0.; + } + ++i__; + } + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = a[i__ + 1 + j * a_dim1]; + a[i__ + 1 + j * a_dim1] = temp; + } + } + ++i__; + } + ++i__; + } + } else { + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + ip = ipiv[i__]; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[i__ + j * a_dim1]; + a[i__ + j * a_dim1] = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = temp; + } + } + } else { + ip = -ipiv[i__]; + --i__; + if (i__ > 1) { + i__1 = i__ - 1; + for (j = 1; j <= i__1; ++j) { + temp = a[i__ + 1 + j * a_dim1]; + a[i__ + 1 + j * a_dim1] = a[ip + j * a_dim1]; + a[ip + j * a_dim1] = temp; + } + } + } + --i__; + } + i__ = 1; + while (i__ <= *n - 1) { + if (ipiv[i__] < 0) { + a[i__ + 1 + i__ * a_dim1] = e[i__]; + ++i__; + } + ++i__; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsyr.cpp b/lib/linalg/dsyr.cpp new file mode 100644 index 0000000000..6806baea29 --- /dev/null +++ b/lib/linalg/dsyr.cpp @@ -0,0 +1,167 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c_n1 = -1; +int dsyr_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *a, + integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer i__, j, ix, jx, kx, info; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"DSYR ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || *alpha == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = kx; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; + } + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = *alpha * x[j]; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[i__] * temp; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = *alpha * x[jx]; + ix = jx; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] += x[ix] * temp; + ix += *incx; + } + } + jx += *incx; + } + } + } + return 0; +} +int dsysv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen), + dsytrf_(char *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, + integer *, ftnlen); + integer lwkopt; + logical lquery; + extern int dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen), + dsytrs2_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, doublereal *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + *info = 0; + lquery = *lwork == -1; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } else if (*lwork < 1 && !lquery) { + *info = -10; + } + if (*info == 0) { + if (*n == 0) { + lwkopt = 1; + } else { + dsytrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], &c_n1, info, (ftnlen)1); + lwkopt = (integer)work[1]; + } + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYSV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + dsytrf_(uplo, n, &a[a_offset], lda, &ipiv[1], &work[1], lwork, info, (ftnlen)1); + if (*info == 0) { + if (*lwork < *n) { + dsytrs_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, info, (ftnlen)1); + } else { + dsytrs2_(uplo, n, nrhs, &a[a_offset], lda, &ipiv[1], &b[b_offset], ldb, &work[1], info, + (ftnlen)1); + } + } + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytf2.cpp b/lib/linalg/dsytf2.cpp new file mode 100644 index 0000000000..8b48de1da4 --- /dev/null +++ b/lib/linalg/dsytf2.cpp @@ -0,0 +1,246 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int dsytf2_(char *uplo, integer *n, doublereal *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + double sqrt(doublereal); + integer i__, j, k; + doublereal t, r1, d11, d12, d21, d22; + integer kk, kp; + doublereal wk, wkm1, wkp1; + integer imax, jmax; + extern int dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, ftnlen); + doublereal alpha; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + integer kstep; + logical upper; + doublereal absakk; + extern integer idamax_(integer *, doublereal *, integer *); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + doublereal colmax, rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L70; + } + kstep = 1; + absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = idamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + idamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); + if (imax > 1) { + i__1 = imax - 1; + jmax = idamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + dswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - kp - 1; + dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + t = a[kk + kk * a_dim1]; + a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = t; + if (kstep == 2) { + t = a[k - 1 + k * a_dim1]; + a[k - 1 + k * a_dim1] = a[kp + k * a_dim1]; + a[kp + k * a_dim1] = t; + } + } + if (kstep == 1) { + r1 = 1. / a[k + k * a_dim1]; + i__1 = k - 1; + d__1 = -r1; + dsyr_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + d12 = a[k - 1 + k * a_dim1]; + d22 = a[k - 1 + (k - 1) * a_dim1] / d12; + d11 = a[k + k * a_dim1] / d12; + t = 1. / (d11 * d22 - 1.); + d12 = t / d12; + for (j = k - 2; j >= 1; --j) { + wkm1 = d12 * (d11 * a[j + (k - 1) * a_dim1] - a[j + k * a_dim1]); + wk = d12 * (d22 * a[j + k * a_dim1] - a[j + (k - 1) * a_dim1]); + for (i__ = j; i__ >= 1; --i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + k * a_dim1] * wk - + a[i__ + (k - 1) * a_dim1] * wkm1; + } + a[j + k * a_dim1] = wk; + a[j + (k - 1) * a_dim1] = wkm1; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L40: + if (k > *n) { + goto L70; + } + kstep = 1; + absakk = (d__1 = a[k + k * a_dim1], abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + idamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + colmax = (d__1 = a[imax + k * a_dim1], abs(d__1)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + idamax_(&i__1, &a[imax + k * a_dim1], lda); + rowmax = (d__1 = a[imax + jmax * a_dim1], abs(d__1)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + idamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + d__2 = rowmax, d__3 = (d__1 = a[jmax + imax * a_dim1], abs(d__1)); + rowmax = max(d__2, d__3); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else if ((d__1 = a[imax + imax * a_dim1], abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + dswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - kk - 1; + dswap_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + t = a[kk + kk * a_dim1]; + a[kk + kk * a_dim1] = a[kp + kp * a_dim1]; + a[kp + kp * a_dim1] = t; + if (kstep == 2) { + t = a[k + 1 + k * a_dim1]; + a[k + 1 + k * a_dim1] = a[kp + k * a_dim1]; + a[kp + k * a_dim1] = t; + } + } + if (kstep == 1) { + if (k < *n) { + d11 = 1. / a[k + k * a_dim1]; + i__1 = *n - k; + d__1 = -d11; + dsyr_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + dscal_(&i__1, &d11, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + d21 = a[k + 1 + k * a_dim1]; + d11 = a[k + 1 + (k + 1) * a_dim1] / d21; + d22 = a[k + k * a_dim1] / d21; + t = 1. / (d11 * d22 - 1.); + d21 = t / d21; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + wk = d21 * (d11 * a[j + k * a_dim1] - a[j + (k + 1) * a_dim1]); + wkp1 = d21 * (d22 * a[j + (k + 1) * a_dim1] - a[j + k * a_dim1]); + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = a[i__ + j * a_dim1] - a[i__ + k * a_dim1] * wk - + a[i__ + (k + 1) * a_dim1] * wkp1; + } + a[j + k * a_dim1] = wk; + a[j + (k + 1) * a_dim1] = wkp1; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L40; + } +L70: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrf.cpp b/lib/linalg/dsytrf.cpp new file mode 100644 index 0000000000..6bfc84ab87 --- /dev/null +++ b/lib/linalg/dsytrf.cpp @@ -0,0 +1,123 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int dsytrf_(char *uplo, integer *n, doublereal *a, integer *lda, integer *ipiv, doublereal *work, + integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int dsytf2_(char *, integer *, doublereal *, integer *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dlasyf_(char *, integer *, integer *, integer *, doublereal *, integer *, integer *, + doublereal *, integer *, integer *, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1] = (doublereal)lwkopt; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"DSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + dlasyf_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], &ldwork, &iinfo, + (ftnlen)1); + } else { + dsytf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + dlasyf_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], &ldwork, + &iinfo, (ftnlen)1); + } else { + i__1 = *n - k + 1; + dsytf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1] = (doublereal)lwkopt; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrs.cpp b/lib/linalg/dsytrs.cpp new file mode 100644 index 0000000000..c9f849879b --- /dev/null +++ b/lib/linalg/dsytrs.cpp @@ -0,0 +1,214 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b7 = -1.; +static integer c__1 = 1; +static doublereal c_b19 = 1.; +int dsytrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1; + integer j, k; + doublereal ak, bk; + integer kp; + doublereal akm1, bkm1; + extern int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, + integer *, doublereal *, integer *); + doublereal akm1k; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal denom; + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), + dswap_(integer *, doublereal *, integer *, doublereal *, integer *); + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRS", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0 || *nrhs == 0) { + return 0; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L30; + } + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + i__1 = k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + d__1 = 1. / a[k + k * a_dim1]; + dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + --k; + } else { + kp = -ipiv[k]; + if (kp != k - 1) { + dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + i__1 = k - 2; + dger_(&i__1, nrhs, &c_b7, &a[k * a_dim1 + 1], &c__1, &b[k + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + i__1 = k - 2; + dger_(&i__1, nrhs, &c_b7, &a[(k - 1) * a_dim1 + 1], &c__1, &b[k - 1 + b_dim1], ldb, + &b[b_dim1 + 1], ldb); + akm1k = a[k - 1 + k * a_dim1]; + akm1 = a[k - 1 + (k - 1) * a_dim1] / akm1k; + ak = a[k + k * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k - 1 + j * b_dim1] / akm1k; + bk = b[k + j * b_dim1] / akm1k; + b[k - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + k += -2; + } + goto L10; + L30: + k = 1; + L40: + if (k > *n) { + goto L50; + } + if (ipiv[k] > 0) { + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1, + &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[k * a_dim1 + 1], &c__1, + &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + i__1 = k - 1; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &a[(k + 1) * a_dim1 + 1], + &c__1, &c_b19, &b[k + 1 + b_dim1], ldb, (ftnlen)9); + kp = -ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + goto L40; + L50:; + } else { + k = 1; + L60: + if (k > *n) { + goto L80; + } + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + if (k < *n) { + i__1 = *n - k; + dger_(&i__1, nrhs, &c_b7, &a[k + 1 + k * a_dim1], &c__1, &b[k + b_dim1], ldb, + &b[k + 1 + b_dim1], ldb); + } + d__1 = 1. / a[k + k * a_dim1]; + dscal_(nrhs, &d__1, &b[k + b_dim1], ldb); + ++k; + } else { + kp = -ipiv[k]; + if (kp != k + 1) { + dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + if (k < *n - 1) { + i__1 = *n - k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k + 2 + k * a_dim1], &c__1, &b[k + b_dim1], ldb, + &b[k + 2 + b_dim1], ldb); + i__1 = *n - k - 1; + dger_(&i__1, nrhs, &c_b7, &a[k + 2 + (k + 1) * a_dim1], &c__1, &b[k + 1 + b_dim1], + ldb, &b[k + 2 + b_dim1], ldb); + } + akm1k = a[k + 1 + k * a_dim1]; + akm1 = a[k + k * a_dim1] / akm1k; + ak = a[k + 1 + (k + 1) * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[k + j * b_dim1] / akm1k; + bk = b[k + 1 + j * b_dim1] / akm1k; + b[k + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[k + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + k += 2; + } + goto L60; + L80: + k = *n; + L90: + if (k < 1) { + goto L100; + } + if (ipiv[k] > 0) { + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + } + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + if (k < *n) { + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + k * a_dim1], &c__1, &c_b19, &b[k + b_dim1], ldb, (ftnlen)9); + i__1 = *n - k; + dgemv_((char *)"Transpose", &i__1, nrhs, &c_b7, &b[k + 1 + b_dim1], ldb, + &a[k + 1 + (k - 1) * a_dim1], &c__1, &c_b19, &b[k - 1 + b_dim1], ldb, + (ftnlen)9); + } + kp = -ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + goto L90; + L100:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dsytrs2.cpp b/lib/linalg/dsytrs2.cpp new file mode 100644 index 0000000000..2d2bc90525 --- /dev/null +++ b/lib/linalg/dsytrs2.cpp @@ -0,0 +1,180 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b10 = 1.; +int dsytrs2_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, + doublereal *b, integer *ldb, doublereal *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + doublereal d__1; + integer i__, j, k; + doublereal ak, bk; + integer kp; + doublereal akm1, bkm1, akm1k; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + doublereal denom; + integer iinfo; + extern int dswap_(integer *, doublereal *, integer *, doublereal *, integer *), + dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen), + dsyconv_(char *, char *, integer *, doublereal *, integer *, integer *, doublereal *, + integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DSYTRS2", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0 || *nrhs == 0) { + return 0; + } + dsyconv_(uplo, (char *)"C", n, &a[a_offset], lda, &ipiv[1], &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + if (upper) { + k = *n; + while (k >= 1) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + kp = -ipiv[k]; + if (kp == -ipiv[k - 1]) { + dswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + } + dtrsm_((char *)"L", (char *)"U", (char *)"N", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__ = *n; + while (i__ >= 1) { + if (ipiv[i__] > 0) { + d__1 = 1. / a[i__ + i__ * a_dim1]; + dscal_(nrhs, &d__1, &b[i__ + b_dim1], ldb); + } else if (i__ > 1) { + if (ipiv[i__ - 1] == ipiv[i__]) { + akm1k = work[i__]; + akm1 = a[i__ - 1 + (i__ - 1) * a_dim1] / akm1k; + ak = a[i__ + i__ * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[i__ - 1 + j * b_dim1] / akm1k; + bk = b[i__ + j * b_dim1] / akm1k; + b[i__ - 1 + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[i__ + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + --i__; + } + } + --i__; + } + dtrsm_((char *)"L", (char *)"U", (char *)"T", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + k = 1; + while (k <= *n) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + kp = -ipiv[k]; + if (k < *n && kp == -ipiv[k + 1]) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + } + } else { + k = 1; + while (k <= *n) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + ++k; + } else { + kp = -ipiv[k + 1]; + if (kp == -ipiv[k]) { + dswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += 2; + } + } + dtrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__ = 1; + while (i__ <= *n) { + if (ipiv[i__] > 0) { + d__1 = 1. / a[i__ + i__ * a_dim1]; + dscal_(nrhs, &d__1, &b[i__ + b_dim1], ldb); + } else { + akm1k = work[i__]; + akm1 = a[i__ + i__ * a_dim1] / akm1k; + ak = a[i__ + 1 + (i__ + 1) * a_dim1] / akm1k; + denom = akm1 * ak - 1.; + i__1 = *nrhs; + for (j = 1; j <= i__1; ++j) { + bkm1 = b[i__ + j * b_dim1] / akm1k; + bk = b[i__ + 1 + j * b_dim1] / akm1k; + b[i__ + j * b_dim1] = (ak * bkm1 - bk) / denom; + b[i__ + 1 + j * b_dim1] = (akm1 * bk - bkm1) / denom; + } + ++i__; + } + ++i__; + } + dtrsm_((char *)"L", (char *)"L", (char *)"T", (char *)"U", n, nrhs, &c_b10, &a[a_offset], lda, &b[b_offset], ldb, (ftnlen)1, + (ftnlen)1, (ftnlen)1, (ftnlen)1); + k = *n; + while (k >= 1) { + if (ipiv[k] > 0) { + kp = ipiv[k]; + if (kp != k) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + --k; + } else { + kp = -ipiv[k]; + if (k > 1 && kp == -ipiv[k - 1]) { + dswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); + } + k += -2; + } + } + } + dsyconv_(uplo, (char *)"R", n, &a[a_offset], lda, &ipiv[1], &work[1], &iinfo, (ftnlen)1, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrevc3.cpp b/lib/linalg/dtrevc3.cpp new file mode 100644 index 0000000000..bd1a0a379e --- /dev/null +++ b/lib/linalg/dtrevc3.cpp @@ -0,0 +1,858 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +static doublereal c_b17 = 0.; +static logical c_false = FALSE_; +static doublereal c_b29 = 1.; +static logical c_true = TRUE_; +int dtrevc3_(char *side, char *howmny, logical *select, integer *n, doublereal *t, integer *ldt, + doublereal *vl, integer *ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m, + doublereal *work, integer *lwork, integer *info, ftnlen side_len, ftnlen howmny_len) +{ + address a__1[2]; + integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1[2], i__2, i__3, i__4; + doublereal d__1, d__2, d__3, d__4; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + double sqrt(doublereal); + integer i__, j, k; + doublereal x[4]; + integer j1, j2, iscomplex[128], nb, ii, ki, ip, is, iv; + doublereal wi, wr; + integer ki2; + doublereal rec, ulp, beta, emax; + logical pair; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); + logical allv; + integer ierr; + doublereal unfl, ovfl, smin; + logical over; + doublereal vmax; + integer jnxt; + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + doublereal scale; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); + doublereal remax; + extern int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); + logical leftv, bothv; + extern int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); + doublereal vcrit; + logical somev; + doublereal xnorm; + extern int dlaln2_(logical *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, + doublereal *, doublereal *, integer *, doublereal *, doublereal *, + integer *), + dlabad_(doublereal *, doublereal *); + extern doublereal dlamch_(char *, ftnlen); + extern integer idamax_(integer *, doublereal *, integer *); + extern int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, + integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, + integer *, ftnlen); + doublereal bignum; + logical rightv; + integer maxwrk; + doublereal smlnum; + logical lquery; + --select; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + vl_dim1 = *ldvl; + vl_offset = 1 + vl_dim1; + vl -= vl_offset; + vr_dim1 = *ldvr; + vr_offset = 1 + vr_dim1; + vr -= vr_offset; + --work; + bothv = lsame_(side, (char *)"B", (ftnlen)1, (ftnlen)1); + rightv = lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1) || bothv; + leftv = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) || bothv; + allv = lsame_(howmny, (char *)"A", (ftnlen)1, (ftnlen)1); + over = lsame_(howmny, (char *)"B", (ftnlen)1, (ftnlen)1); + somev = lsame_(howmny, (char *)"S", (ftnlen)1, (ftnlen)1); + *info = 0; + i__1[0] = 1, a__1[0] = side; + i__1[1] = 1, a__1[1] = howmny; + s_lmp_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"DTREVC", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)2); + maxwrk = *n + (*n << 1) * nb; + work[1] = (doublereal)maxwrk; + lquery = *lwork == -1; + if (!rightv && !leftv) { + *info = -1; + } else if (!allv && !over && !somev) { + *info = -2; + } else if (*n < 0) { + *info = -4; + } else if (*ldt < max(1, *n)) { + *info = -6; + } else if (*ldvl < 1 || leftv && *ldvl < *n) { + *info = -8; + } else if (*ldvr < 1 || rightv && *ldvr < *n) { + *info = -10; + } else { + i__2 = 1, i__3 = *n * 3; + if (*lwork < max(i__2, i__3) && !lquery) { + *info = -14; + } else { + if (somev) { + *m = 0; + pair = FALSE_; + i__2 = *n; + for (j = 1; j <= i__2; ++j) { + if (pair) { + pair = FALSE_; + select[j] = FALSE_; + } else { + if (j < *n) { + if (t[j + 1 + j * t_dim1] == 0.) { + if (select[j]) { + ++(*m); + } + } else { + pair = TRUE_; + if (select[j] || select[j + 1]) { + select[j] = TRUE_; + *m += 2; + } + } + } else { + if (select[*n]) { + ++(*m); + } + } + } + } + } else { + *m = *n; + } + if (*mm < *m) { + *info = -11; + } + } + } + if (*info != 0) { + i__2 = -(*info); + xerbla_((char *)"DTREVC3", &i__2, (ftnlen)7); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + if (over && *lwork >= *n + (*n << 4)) { + nb = (*lwork - *n) / (*n << 1); + nb = min(nb, 128); + i__2 = (nb << 1) + 1; + dlaset_((char *)"F", n, &i__2, &c_b17, &c_b17, &work[1], n, (ftnlen)1); + } else { + nb = 1; + } + unfl = dlamch_((char *)"Safe minimum", (ftnlen)12); + ovfl = 1. / unfl; + dlabad_(&unfl, &ovfl); + ulp = dlamch_((char *)"Precision", (ftnlen)9); + smlnum = unfl * (*n / ulp); + bignum = (1. - ulp) / smlnum; + work[1] = 0.; + i__2 = *n; + for (j = 2; j <= i__2; ++j) { + work[j] = 0.; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1)); + } + } + if (rightv) { + iv = 2; + if (nb > 2) { + iv = nb; + } + ip = 0; + is = *m; + for (ki = *n; ki >= 1; --ki) { + if (ip == -1) { + ip = 1; + goto L140; + } else if (ki == 1) { + ip = 0; + } else if (t[ki + (ki - 1) * t_dim1] == 0.) { + ip = 0; + } else { + ip = -1; + } + if (somev) { + if (ip == 0) { + if (!select[ki]) { + goto L140; + } + } else { + if (!select[ki - 1]) { + goto L140; + } + } + } + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2))); + } + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1, smlnum); + if (ip == 0) { + work[ki + iv * *n] = 1.; + i__2 = ki - 1; + for (k = 1; k <= i__2; ++k) { + work[k + iv * *n] = -t[k + ki * t_dim1]; + } + jnxt = ki - 1; + for (j = ki - 1; j >= 1; --j) { + if (j > jnxt) { + goto L60; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + if (j1 == j2) { + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j + iv * *n] = x[0]; + i__2 = j - 1; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } else { + dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b29, &t[j - 1 + (j - 1) * t_dim1], + ldt, &c_b29, &c_b29, &work[j - 1 + iv * *n], n, &wr, &c_b17, x, + &c__2, &scale, &xnorm, &ierr); + if (xnorm > 1.) { + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1, d__2); + if (beta > bignum / xnorm) { + x[0] /= xnorm; + x[1] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j - 1 + iv * *n] = x[0]; + work[j + iv * *n] = x[1]; + i__2 = j - 2; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[iv * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[1]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } + L60:; + } + if (!over) { + dcopy_(&ki, &work[iv * *n + 1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1)); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + vr[k + is * vr_dim1] = 0.; + } + } else if (nb == 1) { + if (ki > 1) { + i__2 = ki - 1; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, &work[iv * *n + 1], + &c__1, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1, (ftnlen)1); + } + ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1); + remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1)); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } else { + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + iv * *n] = 0.; + } + iscomplex[iv - 1] = ip; + } + } else { + if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= + (d__2 = t[ki + (ki - 1) * t_dim1], abs(d__2))) { + work[ki - 1 + (iv - 1) * *n] = 1.; + work[ki + iv * *n] = wi / t[ki - 1 + ki * t_dim1]; + } else { + work[ki - 1 + (iv - 1) * *n] = -wi / t[ki + (ki - 1) * t_dim1]; + work[ki + iv * *n] = 1.; + } + work[ki + (iv - 1) * *n] = 0.; + work[ki - 1 + iv * *n] = 0.; + i__2 = ki - 2; + for (k = 1; k <= i__2; ++k) { + work[k + (iv - 1) * *n] = + -work[ki - 1 + (iv - 1) * *n] * t[k + (ki - 1) * t_dim1]; + work[k + iv * *n] = -work[ki + iv * *n] * t[k + ki * t_dim1]; + } + jnxt = ki - 2; + for (j = ki - 2; j >= 1; --j) { + if (j > jnxt) { + goto L90; + } + j1 = j; + j2 = j; + jnxt = j - 1; + if (j > 1) { + if (t[j + (j - 1) * t_dim1] != 0.) { + j1 = j - 1; + jnxt = j - 2; + } + } + if (j1 == j2) { + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + (iv - 1) * *n], n, &wr, &wi, x, &c__2, + &scale, &xnorm, &ierr); + if (xnorm > 1.) { + if (work[j] > bignum / xnorm) { + x[0] /= xnorm; + x[2] /= xnorm; + scale /= xnorm; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[(iv - 1) * *n + 1], &c__1); + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j + (iv - 1) * *n] = x[0]; + work[j + iv * *n] = x[2]; + i__2 = j - 1; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[(iv - 1) * *n + 1], + &c__1); + i__2 = j - 1; + d__1 = -x[2]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } else { + dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b29, &t[j - 1 + (j - 1) * t_dim1], + ldt, &c_b29, &c_b29, &work[j - 1 + (iv - 1) * *n], n, &wr, &wi, x, + &c__2, &scale, &xnorm, &ierr); + if (xnorm > 1.) { + d__1 = work[j - 1], d__2 = work[j]; + beta = max(d__1, d__2); + if (beta > bignum / xnorm) { + rec = 1. / xnorm; + x[0] *= rec; + x[2] *= rec; + x[1] *= rec; + x[3] *= rec; + scale *= rec; + } + } + if (scale != 1.) { + dscal_(&ki, &scale, &work[(iv - 1) * *n + 1], &c__1); + dscal_(&ki, &scale, &work[iv * *n + 1], &c__1); + } + work[j - 1 + (iv - 1) * *n] = x[0]; + work[j + (iv - 1) * *n] = x[1]; + work[j - 1 + iv * *n] = x[2]; + work[j + iv * *n] = x[3]; + i__2 = j - 2; + d__1 = -x[0]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, + &work[(iv - 1) * *n + 1], &c__1); + i__2 = j - 2; + d__1 = -x[1]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[(iv - 1) * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[2]; + daxpy_(&i__2, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1, &work[iv * *n + 1], + &c__1); + i__2 = j - 2; + d__1 = -x[3]; + daxpy_(&i__2, &d__1, &t[j * t_dim1 + 1], &c__1, &work[iv * *n + 1], &c__1); + } + L90:; + } + if (!over) { + dcopy_(&ki, &work[(iv - 1) * *n + 1], &c__1, &vr[(is - 1) * vr_dim1 + 1], + &c__1); + dcopy_(&ki, &work[iv * *n + 1], &c__1, &vr[is * vr_dim1 + 1], &c__1); + emax = 0.; + i__2 = ki; + for (k = 1; k <= i__2; ++k) { + d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1], abs(d__1)) + + (d__2 = vr[k + is * vr_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1); + dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1); + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + vr[k + (is - 1) * vr_dim1] = 0.; + vr[k + is * vr_dim1] = 0.; + } + } else if (nb == 1) { + if (ki > 2) { + i__2 = ki - 2; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, + &work[(iv - 1) * *n + 1], &c__1, &work[ki - 1 + (iv - 1) * *n], + &vr[(ki - 1) * vr_dim1 + 1], &c__1, (ftnlen)1); + i__2 = ki - 2; + dgemv_((char *)"N", n, &i__2, &c_b29, &vr[vr_offset], ldvr, &work[iv * *n + 1], + &c__1, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1, (ftnlen)1); + } else { + dscal_(n, &work[ki - 1 + (iv - 1) * *n], &vr[(ki - 1) * vr_dim1 + 1], + &c__1); + dscal_(n, &work[ki + iv * *n], &vr[ki * vr_dim1 + 1], &c__1); + } + emax = 0.; + i__2 = *n; + for (k = 1; k <= i__2; ++k) { + d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1], abs(d__1)) + + (d__2 = vr[k + ki * vr_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1); + dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1); + } else { + i__2 = *n; + for (k = ki + 1; k <= i__2; ++k) { + work[k + (iv - 1) * *n] = 0.; + work[k + iv * *n] = 0.; + } + iscomplex[iv - 2] = -ip; + iscomplex[iv - 1] = ip; + --iv; + } + } + if (nb > 1) { + if (ip == 0) { + ki2 = ki; + } else { + ki2 = ki - 1; + } + if (iv <= 2 || ki2 == 1) { + i__2 = nb - iv + 1; + i__3 = ki2 + nb - iv; + dgemm_((char *)"N", (char *)"N", n, &i__2, &i__3, &c_b29, &vr[vr_offset], ldvr, + &work[iv * *n + 1], n, &c_b17, &work[(nb + iv) * *n + 1], n, (ftnlen)1, + (ftnlen)1); + i__2 = nb; + for (k = iv; k <= i__2; ++k) { + if (iscomplex[k - 1] == 0) { + ii = idamax_(n, &work[(nb + k) * *n + 1], &c__1); + remax = 1. / (d__1 = work[ii + (nb + k) * *n], abs(d__1)); + } else if (iscomplex[k - 1] == 1) { + emax = 0.; + i__3 = *n; + for (ii = 1; ii <= i__3; ++ii) { + d__3 = emax, + d__4 = (d__1 = work[ii + (nb + k) * *n], abs(d__1)) + + (d__2 = work[ii + (nb + k + 1) * *n], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + } + dscal_(n, &remax, &work[(nb + k) * *n + 1], &c__1); + } + i__2 = nb - iv + 1; + dlacpy_((char *)"F", n, &i__2, &work[(nb + iv) * *n + 1], n, &vr[ki2 * vr_dim1 + 1], + ldvr, (ftnlen)1); + iv = nb; + } else { + --iv; + } + } + --is; + if (ip != 0) { + --is; + } + L140:; + } + } + if (leftv) { + iv = 1; + ip = 0; + is = 1; + i__2 = *n; + for (ki = 1; ki <= i__2; ++ki) { + if (ip == 1) { + ip = -1; + goto L260; + } else if (ki == *n) { + ip = 0; + } else if (t[ki + 1 + ki * t_dim1] == 0.) { + ip = 0; + } else { + ip = 1; + } + if (somev) { + if (!select[ki]) { + goto L260; + } + } + wr = t[ki + ki * t_dim1]; + wi = 0.; + if (ip != 0) { + wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) * + sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))); + } + d__1 = ulp * (abs(wr) + abs(wi)); + smin = max(d__1, smlnum); + if (ip == 0) { + work[ki + iv * *n] = 1.; + i__3 = *n; + for (k = ki + 1; k <= i__3; ++k) { + work[k + iv * *n] = -t[ki + k * t_dim1]; + } + vmax = 1.; + vcrit = bignum; + jnxt = ki + 1; + i__3 = *n; + for (j = ki + 1; j <= i__3; ++j) { + if (j < jnxt) { + goto L170; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + if (j1 == j2) { + if (work[j] > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 1; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 1 + j * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + } + work[j + iv * *n] = x[0]; + d__2 = (d__1 = work[j + iv * *n], abs(d__1)); + vmax = max(d__2, vmax); + vcrit = bignum / vmax; + } else { + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1, d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 1; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 1 + j * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + i__4 = j - ki - 1; + work[j + 1 + iv * *n] -= ddot_(&i__4, &t[ki + 1 + (j + 1) * t_dim1], &c__1, + &work[ki + 1 + iv * *n], &c__1); + dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &c_b17, x, &c__2, + &scale, &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + 1 + iv * *n] = x[1]; + d__3 = (d__1 = work[j + iv * *n], abs(d__1)), + d__4 = (d__2 = work[j + 1 + iv * *n], abs(d__2)), d__3 = max(d__3, d__4); + vmax = max(d__3, vmax); + vcrit = bignum / vmax; + } + L170:; + } + if (!over) { + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + iv * *n], &c__1, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + ii = idamax_(&i__3, &vl[ki + is * vl_dim1], &c__1) + ki - 1; + remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1)); + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + is * vl_dim1], &c__1); + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + vl[k + is * vl_dim1] = 0.; + } + } else if (nb == 1) { + if (ki < *n) { + i__3 = *n - ki; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 1) * vl_dim1 + 1], ldvl, + &work[ki + 1 + iv * *n], &c__1, &work[ki + iv * *n], + &vl[ki * vl_dim1 + 1], &c__1, (ftnlen)1); + } + ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1); + remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1)); + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + } else { + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + work[k + iv * *n] = 0.; + } + iscomplex[iv - 1] = ip; + } + } else { + if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= + (d__2 = t[ki + 1 + ki * t_dim1], abs(d__2))) { + work[ki + iv * *n] = wi / t[ki + (ki + 1) * t_dim1]; + work[ki + 1 + (iv + 1) * *n] = 1.; + } else { + work[ki + iv * *n] = 1.; + work[ki + 1 + (iv + 1) * *n] = -wi / t[ki + 1 + ki * t_dim1]; + } + work[ki + 1 + iv * *n] = 0.; + work[ki + (iv + 1) * *n] = 0.; + i__3 = *n; + for (k = ki + 2; k <= i__3; ++k) { + work[k + iv * *n] = -work[ki + iv * *n] * t[ki + k * t_dim1]; + work[k + (iv + 1) * *n] = + -work[ki + 1 + (iv + 1) * *n] * t[ki + 1 + k * t_dim1]; + } + vmax = 1.; + vcrit = bignum; + jnxt = ki + 2; + i__3 = *n; + for (j = ki + 2; j <= i__3; ++j) { + if (j < jnxt) { + goto L200; + } + j1 = j; + j2 = j; + jnxt = j + 1; + if (j < *n) { + if (t[j + 1 + j * t_dim1] != 0.) { + j2 = j + 1; + jnxt = j + 2; + } + } + if (j1 == j2) { + if (work[j] > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + (iv + 1) * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 2; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + (iv + 1) * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + d__1 = -wi; + dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &d__1, x, &c__2, &scale, + &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + (iv + 1) * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + (iv + 1) * *n] = x[2]; + d__3 = (d__1 = work[j + iv * *n], abs(d__1)), + d__4 = (d__2 = work[j + (iv + 1) * *n], abs(d__2)), d__3 = max(d__3, d__4); + vmax = max(d__3, vmax); + vcrit = bignum / vmax; + } else { + d__1 = work[j], d__2 = work[j + 1]; + beta = max(d__1, d__2); + if (beta > vcrit) { + rec = 1. / vmax; + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &rec, &work[ki + (iv + 1) * *n], &c__1); + vmax = 1.; + vcrit = bignum; + } + i__4 = j - ki - 2; + work[j + iv * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + (iv + 1) * *n] -= ddot_(&i__4, &t[ki + 2 + j * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + i__4 = j - ki - 2; + work[j + 1 + iv * *n] -= ddot_(&i__4, &t[ki + 2 + (j + 1) * t_dim1], &c__1, + &work[ki + 2 + iv * *n], &c__1); + i__4 = j - ki - 2; + work[j + 1 + (iv + 1) * *n] -= + ddot_(&i__4, &t[ki + 2 + (j + 1) * t_dim1], &c__1, + &work[ki + 2 + (iv + 1) * *n], &c__1); + d__1 = -wi; + dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b29, &t[j + j * t_dim1], ldt, + &c_b29, &c_b29, &work[j + iv * *n], n, &wr, &d__1, x, &c__2, &scale, + &xnorm, &ierr); + if (scale != 1.) { + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + iv * *n], &c__1); + i__4 = *n - ki + 1; + dscal_(&i__4, &scale, &work[ki + (iv + 1) * *n], &c__1); + } + work[j + iv * *n] = x[0]; + work[j + (iv + 1) * *n] = x[2]; + work[j + 1 + iv * *n] = x[1]; + work[j + 1 + (iv + 1) * *n] = x[3]; + d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1, d__2), + d__2 = abs(x[1]), d__1 = max(d__1, d__2), d__2 = abs(x[3]), + d__1 = max(d__1, d__2); + vmax = max(d__1, vmax); + vcrit = bignum / vmax; + } + L200:; + } + if (!over) { + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + iv * *n], &c__1, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + dcopy_(&i__3, &work[ki + (iv + 1) * *n], &c__1, &vl[ki + (is + 1) * vl_dim1], + &c__1); + emax = 0.; + i__3 = *n; + for (k = ki; k <= i__3; ++k) { + d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs(d__1)) + + (d__2 = vl[k + (is + 1) * vl_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + is * vl_dim1], &c__1); + i__3 = *n - ki + 1; + dscal_(&i__3, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1); + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + vl[k + is * vl_dim1] = 0.; + vl[k + (is + 1) * vl_dim1] = 0.; + } + } else if (nb == 1) { + if (ki < *n - 1) { + i__3 = *n - ki - 1; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 2) * vl_dim1 + 1], ldvl, + &work[ki + 2 + iv * *n], &c__1, &work[ki + iv * *n], + &vl[ki * vl_dim1 + 1], &c__1, (ftnlen)1); + i__3 = *n - ki - 1; + dgemv_((char *)"N", n, &i__3, &c_b29, &vl[(ki + 2) * vl_dim1 + 1], ldvl, + &work[ki + 2 + (iv + 1) * *n], &c__1, &work[ki + 1 + (iv + 1) * *n], + &vl[(ki + 1) * vl_dim1 + 1], &c__1, (ftnlen)1); + } else { + dscal_(n, &work[ki + iv * *n], &vl[ki * vl_dim1 + 1], &c__1); + dscal_(n, &work[ki + 1 + (iv + 1) * *n], &vl[(ki + 1) * vl_dim1 + 1], + &c__1); + } + emax = 0.; + i__3 = *n; + for (k = 1; k <= i__3; ++k) { + d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs(d__1)) + + (d__2 = vl[k + (ki + 1) * vl_dim1], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1); + dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1); + } else { + i__3 = ki - 1; + for (k = 1; k <= i__3; ++k) { + work[k + iv * *n] = 0.; + work[k + (iv + 1) * *n] = 0.; + } + iscomplex[iv - 1] = ip; + iscomplex[iv] = -ip; + ++iv; + } + } + if (nb > 1) { + if (ip == 0) { + ki2 = ki; + } else { + ki2 = ki + 1; + } + if (iv >= nb - 1 || ki2 == *n) { + i__3 = *n - ki2 + iv; + dgemm_((char *)"N", (char *)"N", n, &iv, &i__3, &c_b29, &vl[(ki2 - iv + 1) * vl_dim1 + 1], ldvl, + &work[ki2 - iv + 1 + *n], n, &c_b17, &work[(nb + 1) * *n + 1], n, + (ftnlen)1, (ftnlen)1); + i__3 = iv; + for (k = 1; k <= i__3; ++k) { + if (iscomplex[k - 1] == 0) { + ii = idamax_(n, &work[(nb + k) * *n + 1], &c__1); + remax = 1. / (d__1 = work[ii + (nb + k) * *n], abs(d__1)); + } else if (iscomplex[k - 1] == 1) { + emax = 0.; + i__4 = *n; + for (ii = 1; ii <= i__4; ++ii) { + d__3 = emax, + d__4 = (d__1 = work[ii + (nb + k) * *n], abs(d__1)) + + (d__2 = work[ii + (nb + k + 1) * *n], abs(d__2)); + emax = max(d__3, d__4); + } + remax = 1. / emax; + } + dscal_(n, &remax, &work[(nb + k) * *n + 1], &c__1); + } + dlacpy_((char *)"F", n, &iv, &work[(nb + 1) * *n + 1], n, + &vl[(ki2 - iv + 1) * vl_dim1 + 1], ldvl, (ftnlen)1); + iv = 1; + } else { + ++iv; + } + } + ++is; + if (ip != 0) { + ++is; + } + L260:; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrexc.cpp b/lib/linalg/dtrexc.cpp new file mode 100644 index 0000000000..07568d6ed2 --- /dev/null +++ b/lib/linalg/dtrexc.cpp @@ -0,0 +1,217 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c__2 = 2; +int dtrexc_(char *compq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, + integer *ifst, integer *ilst, doublereal *work, integer *info, ftnlen compq_len) +{ + integer q_dim1, q_offset, t_dim1, t_offset, i__1; + integer nbf, nbl, here; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical wantq; + extern int dlaexc_(logical *, integer *, doublereal *, integer *, doublereal *, integer *, + integer *, integer *, integer *, doublereal *, integer *), + xerbla_(char *, integer *, ftnlen); + integer nbnext; + t_dim1 = *ldt; + t_offset = 1 + t_dim1; + t -= t_offset; + q_dim1 = *ldq; + q_offset = 1 + q_dim1; + q -= q_offset; + --work; + *info = 0; + wantq = lsame_(compq, (char *)"V", (ftnlen)1, (ftnlen)1); + if (!wantq && !lsame_(compq, (char *)"N", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*ldt < max(1, *n)) { + *info = -4; + } else if (*ldq < 1 || wantq && *ldq < max(1, *n)) { + *info = -6; + } else if ((*ifst < 1 || *ifst > *n) && *n > 0) { + *info = -7; + } else if ((*ilst < 1 || *ilst > *n) && *n > 0) { + *info = -8; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DTREXC", &i__1, (ftnlen)6); + return 0; + } + if (*n <= 1) { + return 0; + } + if (*ifst > 1) { + if (t[*ifst + (*ifst - 1) * t_dim1] != 0.) { + --(*ifst); + } + } + nbf = 1; + if (*ifst < *n) { + if (t[*ifst + 1 + *ifst * t_dim1] != 0.) { + nbf = 2; + } + } + if (*ilst > 1) { + if (t[*ilst + (*ilst - 1) * t_dim1] != 0.) { + --(*ilst); + } + } + nbl = 1; + if (*ilst < *n) { + if (t[*ilst + 1 + *ilst * t_dim1] != 0.) { + nbl = 2; + } + } + if (*ifst == *ilst) { + return 0; + } + if (*ifst < *ilst) { + if (nbf == 2 && nbl == 1) { + --(*ilst); + } + if (nbf == 1 && nbl == 2) { + ++(*ilst); + } + here = *ifst; + L10: + if (nbf == 1 || nbf == 2) { + nbnext = 1; + if (here + nbf + 1 <= *n) { + if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.) { + nbnext = 2; + } + } + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &nbf, &nbnext, &work[1], + info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += nbnext; + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + } else { + nbnext = 1; + if (here + 3 <= *n) { + if (t[here + 3 + (here + 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here + 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &nbnext, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &nbnext, + &work[1], info); + ++here; + } else { + if (t[here + 2 + (here + 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &nbnext, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += 2; + } else { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &c__1, + &work[1], info); + i__1 = here + 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &c__1, + &work[1], info); + here += 2; + } + } + } + if (here < *ilst) { + goto L10; + } + } else { + here = *ifst; + L20: + if (nbf == 1 || nbf == 2) { + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &nbnext, &nbf, &work[1], + info); + if (*info != 0) { + *ilst = here; + return 0; + } + here -= nbnext; + if (nbf == 2) { + if (t[here + 1 + here * t_dim1] == 0.) { + nbf = 3; + } + } + } else { + nbnext = 1; + if (here >= 3) { + if (t[here - 1 + (here - 2) * t_dim1] != 0.) { + nbnext = 2; + } + } + i__1 = here - nbnext; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &nbnext, &c__1, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + if (nbnext == 1) { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &nbnext, &c__1, + &work[1], info); + --here; + } else { + if (t[here + (here - 1) * t_dim1] == 0.) { + nbnext = 1; + } + if (nbnext == 2) { + i__1 = here - 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__2, &c__1, + &work[1], info); + if (*info != 0) { + *ilst = here; + return 0; + } + here += -2; + } else { + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &c__1, &c__1, + &work[1], info); + i__1 = here - 1; + dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &c__1, &c__1, + &work[1], info); + here += -2; + } + } + } + if (here > *ilst) { + goto L20; + } + } + *ilst = here; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dtrtrs.cpp b/lib/linalg/dtrtrs.cpp new file mode 100644 index 0000000000..3ef3eac882 --- /dev/null +++ b/lib/linalg/dtrtrs.cpp @@ -0,0 +1,65 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b12 = 1.; +int dtrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublereal *a, + integer *lda, doublereal *b, integer *ldb, integer *info, ftnlen uplo_len, + ftnlen trans_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*nrhs < 0) { + *info = -5; + } else if (*lda < max(1, *n)) { + *info = -7; + } else if (*ldb < max(1, *n)) { + *info = -9; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DTRTRS", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + if (a[*info + *info * a_dim1] == 0.) { + return 0; + } + } + } + *info = 0; + dtrsm_((char *)"Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[b_offset], ldb, + (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/izamax.cpp b/lib/linalg/izamax.cpp new file mode 100644 index 0000000000..1aebf6ac52 --- /dev/null +++ b/lib/linalg/izamax.cpp @@ -0,0 +1,46 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +integer izamax_(integer *n, doublecomplex *zx, integer *incx) +{ + integer ret_val, i__1; + integer i__, ix; + doublereal dmax__; + extern doublereal dcabs1_(doublecomplex *); + --zx; + ret_val = 0; + if (*n < 1 || *incx <= 0) { + return ret_val; + } + ret_val = 1; + if (*n == 1) { + return ret_val; + } + if (*incx == 1) { + dmax__ = dcabs1_(&zx[1]); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[i__]) > dmax__) { + ret_val = i__; + dmax__ = dcabs1_(&zx[i__]); + } + } + } else { + ix = 1; + dmax__ = dcabs1_(&zx[1]); + ix += *incx; + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + if (dcabs1_(&zx[ix]) > dmax__) { + ret_val = i__; + dmax__ = dcabs1_(&zx[ix]); + } + ix += *incx; + } + } + return ret_val; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zcop.cpp b/lib/linalg/zcop.cpp new file mode 100644 index 0000000000..4ec6ae0b78 --- /dev/null +++ b/lib/linalg/zcop.cpp @@ -0,0 +1,43 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zcopy_(integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy) +{ + integer i__1, i__2, i__3; + integer i__, ix, iy; + --zy; + --zx; + if (*n <= 0) { + return 0; + } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + } + } else { + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = ix; + zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i; + ix += *incx; + iy += *incy; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zdotu.cpp b/lib/linalg/zdotu.cpp new file mode 100644 index 0000000000..1b284d12c6 --- /dev/null +++ b/lib/linalg/zdotu.cpp @@ -0,0 +1,55 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +VOID zdotu_(doublecomplex *ret_val, integer *n, doublecomplex *zx, integer *incx, doublecomplex *zy, + integer *incy) +{ + integer i__1, i__2, i__3; + doublecomplex z__1, z__2; + integer i__, ix, iy; + doublecomplex ztemp; + --zy; + --zx; + ztemp.r = 0., ztemp.i = 0.; + ret_val->r = 0., ret_val->i = 0.; + if (*n <= 0) { + return; + } + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, + z__2.i = zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + } + } else { + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = ix; + i__3 = iy; + z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, + z__2.i = zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r; + z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i; + ztemp.r = z__1.r, ztemp.i = z__1.i; + ix += *incx; + iy += *incy; + } + } + ret_val->r = ztemp.r, ret_val->i = ztemp.i; + return; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetrf.cpp b/lib/linalg/zgetrf.cpp new file mode 100644 index 0000000000..5fb9182b87 --- /dev/null +++ b/lib/linalg/zgetrf.cpp @@ -0,0 +1,90 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +int zgetrf_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + integer i__, j, jb, nb, iinfo; + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zlaswp_(integer *, doublecomplex *, integer *, integer *, integer *, integer *, + integer *), + zgetrf2_(integer *, integer *, doublecomplex *, integer *, integer *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRF", &i__1, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZGETRF", (char *)" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= min(*m, *n)) { + zgetrf2_(m, n, &a[a_offset], lda, &ipiv[1], info); + } else { + i__1 = min(*m, *n); + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = min(*m, *n) - j + 1; + jb = min(i__3, nb); + i__3 = *m - j + 1; + zgetrf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo + j - 1; + } + i__4 = *m, i__5 = j + jb - 1; + i__3 = min(i__4, i__5); + for (i__ = j; i__ <= i__3; ++i__) { + ipiv[i__] = j - 1 + ipiv[i__]; + } + i__3 = j - 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j + jb - 1; + zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &ipiv[1], &c__1); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Left", (char *)"Lower", (char *)"No transpose", (char *)"Unit", &jb, &i__3, &c_b1, + &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4, + (ftnlen)5, (ftnlen)12, (ftnlen)4); + if (j + jb <= *m) { + i__3 = *m - j - jb + 1; + i__4 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"No transpose", &i__3, &i__4, &jb, &z__1, + &a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b1, + &a[j + jb + (j + jb) * a_dim1], lda, (ftnlen)12, (ftnlen)12); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetrf2.cpp b/lib/linalg/zgetrf2.cpp new file mode 100644 index 0000000000..805b5810bc --- /dev/null +++ b/lib/linalg/zgetrf2.cpp @@ -0,0 +1,117 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zgetrf2_(integer *m, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + double z_lmp_abs(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer i__, n1, n2; + doublecomplex temp; + integer iinfo; + doublereal sfmin; + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), + zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen); + extern doublereal dlamch_(char *, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + extern integer izamax_(integer *, doublecomplex *, integer *); + extern int zlaswp_(integer *, doublecomplex *, integer *, integer *, integer *, integer *, + integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + if (*m < 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *m)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRF2", &i__1, (ftnlen)7); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + if (*m == 1) { + ipiv[1] = 1; + i__1 = a_dim1 + 1; + if (a[i__1].r == 0. && a[i__1].i == 0.) { + *info = 1; + } + } else if (*n == 1) { + sfmin = dlamch_((char *)"S", (ftnlen)1); + i__ = izamax_(m, &a[a_dim1 + 1], &c__1); + ipiv[1] = i__; + i__1 = i__ + a_dim1; + if (a[i__1].r != 0. || a[i__1].i != 0.) { + if (i__ != 1) { + i__1 = a_dim1 + 1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = a_dim1 + 1; + i__2 = i__ + a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = i__ + a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + if (z_lmp_abs(&a[a_dim1 + 1]) >= sfmin) { + i__1 = *m - 1; + z_lmp_div(&z__1, &c_b1, &a[a_dim1 + 1]); + zscal_(&i__1, &z__1, &a[a_dim1 + 2], &c__1); + } else { + i__1 = *m - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + 1 + a_dim1; + z_lmp_div(&z__1, &a[i__ + 1 + a_dim1], &a[a_dim1 + 1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + } else { + *info = 1; + } + } else { + n1 = min(*m, *n) / 2; + n2 = *n - n1; + zgetrf2_(m, &n1, &a[a_offset], lda, &ipiv[1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + zlaswp_(&n2, &a[(n1 + 1) * a_dim1 + 1], lda, &c__1, &n1, &ipiv[1], &c__1); + ztrsm_((char *)"L", (char *)"L", (char *)"N", (char *)"U", &n1, &n2, &c_b1, &a[a_offset], lda, &a[(n1 + 1) * a_dim1 + 1], + lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__1 = *m - n1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"N", (char *)"N", &i__1, &n2, &n1, &z__1, &a[n1 + 1 + a_dim1], lda, + &a[(n1 + 1) * a_dim1 + 1], lda, &c_b1, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__1 = *m - n1; + zgetrf2_(&i__1, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &ipiv[n1 + 1], &iinfo); + if (*info == 0 && iinfo > 0) { + *info = iinfo + n1; + } + i__1 = min(*m, *n); + for (i__ = n1 + 1; i__ <= i__1; ++i__) { + ipiv[i__] += n1; + } + i__1 = n1 + 1; + i__2 = min(*m, *n); + zlaswp_(&n1, &a[a_dim1 + 1], lda, &i__1, &i__2, &ipiv[1], &c__1); + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zgetri.cpp b/lib/linalg/zgetri.cpp new file mode 100644 index 0000000000..a61e931cb4 --- /dev/null +++ b/lib/linalg/zgetri.cpp @@ -0,0 +1,132 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b2 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zgetri_(integer *n, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *work, + integer *lwork, integer *info) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1; + integer i__, j, jb, nb, jj, jp, nn, iws, nbmin; + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + extern int ztrtri_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, + ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + nb = ilaenv_(&c__1, (char *)"ZGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + lquery = *lwork == -1; + if (*n < 0) { + *info = -1; + } else if (*lda < max(1, *n)) { + *info = -3; + } else if (*lwork < max(1, *n) && !lquery) { + *info = -6; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZGETRI", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + ztrtri_((char *)"Upper", (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)5, (ftnlen)8); + if (*info > 0) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + i__1 = ldwork * nb; + iws = max(i__1, 1); + if (*lwork < iws) { + nb = *lwork / ldwork; + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZGETRI", (char *)" ", n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = *n; + } + if (nb < nbmin || nb >= *n) { + for (j = *n; j >= 1; --j) { + i__1 = *n; + for (i__ = j + 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__ + j * a_dim1; + work[i__2].r = a[i__3].r, work[i__2].i = a[i__3].i; + i__2 = i__ + j * a_dim1; + a[i__2].r = 0., a[i__2].i = 0.; + } + if (j < *n) { + i__1 = *n - j; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", n, &i__1, &z__1, &a[(j + 1) * a_dim1 + 1], lda, &work[j + 1], + &c__1, &c_b2, &a[j * a_dim1 + 1], &c__1, (ftnlen)12); + } + } + } else { + nn = (*n - 1) / nb * nb + 1; + i__1 = -nb; + for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = nb, i__3 = *n - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = *n; + for (i__ = jj + 1; i__ <= i__3; ++i__) { + i__4 = i__ + (jj - j) * ldwork; + i__5 = i__ + jj * a_dim1; + work[i__4].r = a[i__5].r, work[i__4].i = a[i__5].i; + i__4 = i__ + jj * a_dim1; + a[i__4].r = 0., a[i__4].i = 0.; + } + } + if (j + jb <= *n) { + i__2 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"No transpose", n, &jb, &i__2, &z__1, + &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &ldwork, &c_b2, + &a[j * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)12); + } + ztrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", (char *)"Unit", n, &jb, &c_b2, &work[j], &ldwork, + &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)4); + } + } + for (j = *n - 1; j >= 1; --j) { + jp = ipiv[j]; + if (jp != j) { + zswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); + } + } + work[1].r = (doublereal)iws, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegs2.cpp b/lib/linalg/zhegs2.cpp new file mode 100644 index 0000000000..685f548c61 --- /dev/null +++ b/lib/linalg/zhegs2.cpp @@ -0,0 +1,197 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zhegs2_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + doublereal d__1, d__2; + doublecomplex z__1; + integer k; + doublecomplex ct; + doublereal akk, bkk; + extern int zher2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *), + ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen, ftnlen, ftnlen), + ztrsv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *), + zlacgv_(integer *, doublecomplex *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGS2", &i__1, (ftnlen)6); + return 0; + } + if (*itype == 1) { + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + d__1 = bkk; + akk /= d__1 * d__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + zdscal_(&i__2, &d__1, &a[k + (k + 1) * a_dim1], lda); + d__1 = akk * -.5; + ct.r = d__1, ct.i = 0.; + i__2 = *n - k; + zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + i__2 = *n - k; + zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], + lda); + i__2 = *n - k; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[k + (k + 1) * a_dim1], lda, + &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) * a_dim1], lda, + (ftnlen)1); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], + lda); + i__2 = *n - k; + zlacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb); + i__2 = *n - k; + ztrsv_(uplo, (char *)"Conjugate transpose", (char *)"Non-unit", &i__2, + &b[k + 1 + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * a_dim1], lda, + (ftnlen)1, (ftnlen)19, (ftnlen)8); + i__2 = *n - k; + zlacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda); + } + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + d__1 = bkk; + akk /= d__1 * d__1; + i__2 = k + k * a_dim1; + a[i__2].r = akk, a[i__2].i = 0.; + if (k < *n) { + i__2 = *n - k; + d__1 = 1. / bkk; + zdscal_(&i__2, &d__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = akk * -.5; + ct.r = d__1, ct.i = 0.; + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + k * a_dim1], + &c__1); + i__2 = *n - k; + z__1.r = -1., z__1.i = -0.; + zher2_(uplo, &i__2, &z__1, &a[k + 1 + k * a_dim1], &c__1, + &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) * a_dim1], lda, + (ftnlen)1); + i__2 = *n - k; + zaxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + k * a_dim1], + &c__1); + i__2 = *n - k; + ztrsv_(uplo, (char *)"No transpose", (char *)"Non-unit", &i__2, &b[k + 1 + (k + 1) * b_dim1], + ldb, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1, (ftnlen)12, (ftnlen)8); + } + } + } + } else { + if (upper) { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + ztrmv_(uplo, (char *)"No transpose", (char *)"Non-unit", &i__2, &b[b_offset], ldb, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1, (ftnlen)12, (ftnlen)8); + d__1 = akk * .5; + ct.r = d__1, ct.i = 0.; + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + i__2 = k - 1; + zher2_(uplo, &i__2, &c_b1, &a[k * a_dim1 + 1], &c__1, &b[k * b_dim1 + 1], &c__1, + &a[a_offset], lda, (ftnlen)1); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + i__2 = k - 1; + zdscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1); + i__2 = k + k * a_dim1; + d__2 = bkk; + d__1 = akk * (d__2 * d__2); + a[i__2].r = d__1, a[i__2].i = 0.; + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + i__2 = k + k * a_dim1; + akk = a[i__2].r; + i__2 = k + k * b_dim1; + bkk = b[i__2].r; + i__2 = k - 1; + zlacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k - 1; + ztrmv_(uplo, (char *)"Conjugate transpose", (char *)"Non-unit", &i__2, &b[b_offset], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)19, (ftnlen)8); + d__1 = akk * .5; + ct.r = d__1, ct.i = 0.; + i__2 = k - 1; + zlacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + zher2_(uplo, &i__2, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, &a[a_offset], + lda, (ftnlen)1); + i__2 = k - 1; + zaxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda); + i__2 = k - 1; + zlacgv_(&i__2, &b[k + b_dim1], ldb); + i__2 = k - 1; + zdscal_(&i__2, &bkk, &a[k + a_dim1], lda); + i__2 = k - 1; + zlacgv_(&i__2, &a[k + a_dim1], lda); + i__2 = k + k * a_dim1; + d__2 = bkk; + d__1 = akk * (d__2 * d__2); + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegst.cpp b/lib/linalg/zhegst.cpp new file mode 100644 index 0000000000..8c9d9434cb --- /dev/null +++ b/lib/linalg/zhegst.cpp @@ -0,0 +1,195 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublecomplex c_b2 = {.5, 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b18 = 1.; +int zhegst_(integer *itype, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + doublecomplex z__1; + integer k, kb, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zhemm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, + integer *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + zhegs2_(integer *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, ftnlen), + zher2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } else if (*ldb < max(1, *n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGST", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZHEGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zhegs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, (ftnlen)1); + } else { + if (*itype == 1) { + if (upper) { + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ztrsm_((char *)"L", uplo, (char *)"C", (char *)"N", &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"L", uplo, &kb, &i__3, &z__1, &a[k + k * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b1, &a[k + (k + kb) * a_dim1], + lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, (char *)"C", &i__3, &kb, &z__1, &a[k + (k + kb) * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b18, + &a[k + kb + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"L", uplo, &kb, &i__3, &z__1, &a[k + k * a_dim1], lda, + &b[k + (k + kb) * b_dim1], ldb, &c_b1, &a[k + (k + kb) * a_dim1], + lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + ztrsm_((char *)"R", uplo, (char *)"N", (char *)"N", &kb, &i__3, &c_b1, + &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + (k + kb) * a_dim1], lda, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + if (k + kb <= *n) { + i__3 = *n - k - kb + 1; + ztrsm_((char *)"R", uplo, (char *)"C", (char *)"N", &i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + kb + k * a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, + (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"R", uplo, &i__3, &kb, &z__1, &a[k + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b1, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -1., z__1.i = -0.; + zher2k_(uplo, (char *)"N", &i__3, &kb, &z__1, &a[k + kb + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b18, + &a[k + kb + (k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + z__1.r = -.5, z__1.i = -0.; + zhemm_((char *)"R", uplo, &i__3, &kb, &z__1, &a[k + k * a_dim1], lda, + &b[k + kb + k * b_dim1], ldb, &c_b1, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1); + i__3 = *n - k - kb + 1; + ztrsm_((char *)"L", uplo, (char *)"N", (char *)"N", &i__3, &kb, &c_b1, + &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + kb + k * a_dim1], lda, + (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + } + } + } + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + i__3 = k - 1; + ztrmm_((char *)"L", uplo, (char *)"N", (char *)"N", &i__3, &kb, &c_b1, &b[b_offset], ldb, + &a[k * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"R", uplo, &i__3, &kb, &c_b2, &a[k + k * a_dim1], lda, + &b[k * b_dim1 + 1], ldb, &c_b1, &a[k * a_dim1 + 1], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + zher2k_(uplo, (char *)"N", &i__3, &kb, &c_b1, &a[k * a_dim1 + 1], lda, + &b[k * b_dim1 + 1], ldb, &c_b18, &a[a_offset], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"R", uplo, &i__3, &kb, &c_b2, &a[k + k * a_dim1], lda, + &b[k * b_dim1 + 1], ldb, &c_b1, &a[k * a_dim1 + 1], lda, (ftnlen)1, + (ftnlen)1); + i__3 = k - 1; + ztrmm_((char *)"R", uplo, (char *)"C", (char *)"N", &i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, + &a[k * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + } + } else { + i__2 = *n; + i__1 = nb; + for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { + i__3 = *n - k + 1; + kb = min(i__3, nb); + i__3 = k - 1; + ztrmm_((char *)"R", uplo, (char *)"N", (char *)"N", &kb, &i__3, &c_b1, &b[b_offset], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"L", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1], lda, &b[k + b_dim1], + ldb, &c_b1, &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zher2k_(uplo, (char *)"C", &i__3, &kb, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, + &c_b18, &a[a_offset], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + zhemm_((char *)"L", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1], lda, &b[k + b_dim1], + ldb, &c_b1, &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1); + i__3 = k - 1; + ztrmm_((char *)"L", uplo, (char *)"C", (char *)"N", &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, + &a[k + a_dim1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zhegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, + info, (ftnlen)1); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhegv.cpp b/lib/linalg/zhegv.cpp new file mode 100644 index 0000000000..9d85be5132 --- /dev/null +++ b/lib/linalg/zhegv.cpp @@ -0,0 +1,115 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +int zhegv_(integer *itype, char *jobz, char *uplo, integer *n, doublecomplex *a, integer *lda, + doublecomplex *b, integer *ldb, doublereal *w, doublecomplex *work, integer *lwork, + doublereal *rwork, integer *info, ftnlen jobz_len, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; + integer nb, neig; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zheev_(char *, char *, integer *, doublecomplex *, integer *, doublereal *, + doublecomplex *, integer *, doublereal *, integer *, ftnlen, ftnlen); + char trans[1]; + logical upper, wantz; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zhegst_(integer *, char *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *, integer *, ftnlen); + integer lwkopt; + logical lquery; + extern int zpotrf_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + --w; + --work; + --rwork; + wantz = lsame_(jobz, (char *)"V", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + *info = 0; + if (*itype < 1 || *itype > 3) { + *info = -1; + } else if (!(wantz || lsame_(jobz, (char *)"N", (ftnlen)1, (ftnlen)1))) { + *info = -2; + } else if (!(upper || lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1))) { + *info = -3; + } else if (*n < 0) { + *info = -4; + } else if (*lda < max(1, *n)) { + *info = -6; + } else if (*ldb < max(1, *n)) { + *info = -8; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + i__1 = 1, i__2 = (nb + 1) * *n; + lwkopt = max(i__1, i__2); + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + i__1 = 1, i__2 = (*n << 1) - 1; + if (*lwork < max(i__1, i__2) && !lquery) { + *info = -11; + } + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHEGV ", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + if (*n == 0) { + return 0; + } + zpotrf_(uplo, n, &b[b_offset], ldb, info, (ftnlen)1); + if (*info != 0) { + *info = *n + *info; + return 0; + } + zhegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, (ftnlen)1); + zheev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[1], info, (ftnlen)1, + (ftnlen)1); + if (wantz) { + neig = *n; + if (*info > 0) { + neig = *info - 1; + } + if (*itype == 1 || *itype == 2) { + if (upper) { + *(unsigned char *)trans = 'N'; + } else { + *(unsigned char *)trans = 'C'; + } + ztrsm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, &neig, &c_b1, &b[b_offset], ldb, + &a[a_offset], lda, (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)8); + } else if (*itype == 3) { + if (upper) { + *(unsigned char *)trans = 'C'; + } else { + *(unsigned char *)trans = 'N'; + } + ztrmm_((char *)"Left", uplo, trans, (char *)"Non-unit", n, &neig, &c_b1, &b[b_offset], ldb, + &a[a_offset], lda, (ftnlen)4, (ftnlen)1, (ftnlen)1, (ftnlen)8); + } + } + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhemm.cpp b/lib/linalg/zhemm.cpp new file mode 100644 index 0000000000..3237e16c2c --- /dev/null +++ b/lib/linalg/zhemm.cpp @@ -0,0 +1,271 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zhemm_(char *side, char *uplo, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, + integer *lda, doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex *c__, + integer *ldc, ftnlen side_len, ftnlen uplo_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, + i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4, z__5; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, k, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + nrowa = *m; + } else { + nrowa = *n; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1) && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1, nrowa)) { + info = 7; + } else if (*ldb < max(1, *m)) { + info = 9; + } else if (*ldc < max(1, *m)) { + info = 12; + } + if (info != 0) { + xerbla_((char *)"ZHEMM ", &info, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0 || + alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.)) { + return 0; + } + if (alpha->r == 0. && alpha->i == 0.) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__1.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + return 0; + } + if (lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + j * c_dim1; + i__5 = k + j * c_dim1; + i__6 = k + i__ * a_dim1; + z__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, + z__2.i = temp1.r * a[i__6].i + temp1.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + i__4 = k + j * b_dim1; + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + z__2.r = b[i__4].r * z__3.r - b[i__4].i * z__3.i, + z__2.i = b[i__4].r * z__3.i + b[i__4].i * z__3.r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + if (beta->r == 0. && beta->i == 0.) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + i__ * a_dim1; + d__1 = a[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__3.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__3.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + i__5 = i__ + i__ * a_dim1; + d__1 = a[i__5].r; + z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; + z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__5.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + j * c_dim1; + i__4 = k + j * c_dim1; + i__5 = k + i__ * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = c__[i__4].r + z__2.r, z__1.i = c__[i__4].i + z__2.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + i__3 = k + j * b_dim1; + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + z__2.r = b[i__3].r * z__3.r - b[i__3].i * z__3.i, + z__2.i = b[i__3].r * z__3.i + b[i__3].i * z__3.r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + if (beta->r == 0. && beta->i == 0.) { + i__2 = i__ + j * c_dim1; + i__3 = i__ + i__ * a_dim1; + d__1 = a[i__3].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__3.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + } else { + i__2 = i__ + j * c_dim1; + i__3 = i__ + j * c_dim1; + z__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3].i, + z__3.i = beta->r * c__[i__3].i + beta->i * c__[i__3].r; + i__4 = i__ + i__ * a_dim1; + d__1 = a[i__4].r; + z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i; + z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i; + z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__5.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; + c__[i__2].r = z__1.r, c__[i__2].i = z__1.i; + } + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1 * alpha->r, z__1.i = d__1 * alpha->i; + temp1.r = z__1.r, temp1.i = z__1.i; + if (beta->r == 0. && beta->i == 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, + z__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4].r; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, + z__2.i = beta->r * c__[i__4].i + beta->i * c__[i__4].r; + i__5 = i__ + j * b_dim1; + z__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, + z__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5].r; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (upper) { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, + z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (upper) { + d_lmp_cnjg(&z__2, &a[j + k * a_dim1]); + z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, + z__1.i = alpha->r * z__2.i + alpha->i * z__2.r; + temp1.r = z__1.r, temp1.i = z__1.i; + } else { + i__3 = k + j * a_dim1; + z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, + z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, + z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zher.cpp b/lib/linalg/zher.cpp new file mode 100644 index 0000000000..6514a72f65 --- /dev/null +++ b/lib/linalg/zher.cpp @@ -0,0 +1,187 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zher_(char *uplo, integer *n, doublereal *alpha, doublecomplex *x, integer *incx, + doublecomplex *a, integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"ZHER ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || *alpha == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[j]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, + z__1.i = x[i__4].r * temp.i + x[i__4].i * temp.r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[jx]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, + z__1.i = x[i__4].r * temp.i + x[i__4].i * temp.r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[j]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = j; + z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, + z__1.i = temp.r * x[i__4].i + temp.i * x[i__4].r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + d_lmp_cnjg(&z__2, &x[jx]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + i__4 = jx; + z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, + z__1.i = temp.r * x[i__4].i + temp.i * x[i__4].r; + d__1 = a[i__3].r + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } else { + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + a[i__2].r = d__1, a[i__2].i = 0.; + } + jx += *incx; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zherk.cpp b/lib/linalg/zherk.cpp new file mode 100644 index 0000000000..efae201bfa --- /dev/null +++ b/lib/linalg/zherk.cpp @@ -0,0 +1,325 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zherk_(char *uplo, char *trans, integer *n, integer *k, doublereal *alpha, doublecomplex *a, + integer *lda, doublereal *beta, doublecomplex *c__, integer *ldc, ftnlen uplo_len, + ftnlen trans_len) +{ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1; + doublecomplex z__1, z__2, z__3; + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, l, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + doublereal rtemp; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1, nrowa)) { + info = 7; + } else if (*ldc < max(1, *n)) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"ZHERK ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) { + return 0; + } + if (*alpha == 0.) { + if (upper) { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + } + } else { + if (*beta == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } + return 0; + } + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } else if (*beta != 1.) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_lmp_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j - 1; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = i__ + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + c__[i__3].r = 0., c__[i__3].i = 0.; + } + } else if (*beta != 1.) { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * c_dim1; + i__4 = i__ + j * c_dim1; + z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[i__4].i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + i__3 = j + l * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + d_lmp_cnjg(&z__2, &a[j + l * a_dim1]); + z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + i__3 = j + j * c_dim1; + i__4 = j + j * c_dim1; + i__5 = j + l * a_dim1; + z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__1.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + d__1 = c__[i__4].r + z__1.r; + c__[i__3].r = d__1, c__[i__3].i = 0.; + i__3 = *n; + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * c_dim1; + i__5 = i__ + j * c_dim1; + i__6 = i__ + l * a_dim1; + z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, + z__2.i = temp.r * a[i__6].i + temp.i * a[i__6].r; + z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + z__2.i; + c__[i__4].r = z__1.r, c__[i__4].i = z__1.i; + } + } + } + } + } + } else { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_lmp_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_lmp_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, + z__2.i = z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + rtemp = 0.; + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + d_lmp_cnjg(&z__3, &a[l + j * a_dim1]); + i__3 = l + j * a_dim1; + z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, + z__2.i = z__3.r * a[i__3].i + z__3.i * a[i__3].r; + z__1.r = rtemp + z__2.r, z__1.i = z__2.i; + rtemp = z__1.r; + } + if (*beta == 0.) { + i__2 = j + j * c_dim1; + d__1 = *alpha * rtemp; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } else { + i__2 = j + j * c_dim1; + i__3 = j + j * c_dim1; + d__1 = *alpha * rtemp + *beta * c__[i__3].r; + c__[i__2].r = d__1, c__[i__2].i = 0.; + } + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + temp.r = 0., temp.i = 0.; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + d_lmp_cnjg(&z__3, &a[l + i__ * a_dim1]); + i__4 = l + j * a_dim1; + z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, + z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (*beta == 0.) { + i__3 = i__ + j * c_dim1; + z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } else { + i__3 = i__ + j * c_dim1; + z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i; + i__4 = i__ + j * c_dim1; + z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[i__4].i; + z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; + c__[i__3].r = z__1.r, c__[i__3].i = z__1.i; + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetf2.cpp b/lib/linalg/zhetf2.cpp new file mode 100644 index 0000000000..c960a63bc1 --- /dev/null +++ b/lib/linalg/zhetf2.cpp @@ -0,0 +1,439 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +int zhetf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + doublereal d__; + integer i__, j, k; + doublecomplex t; + doublereal r1, d11; + doublecomplex d12; + doublereal d22; + doublecomplex d21; + integer kk, kp; + doublecomplex wk; + doublereal tt; + doublecomplex wkm1, wkp1; + integer imax, jmax; + extern int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, + integer *, ftnlen); + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kstep; + logical upper; + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + extern doublereal dlapy2_(doublereal *, doublereal *); + doublereal absakk; + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L90; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; + } + i__1 = kp + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + if (kstep == 1) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = k - 1; + d__1 = -r1; + zher_(uplo, &i__1, &d__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_lmp_imag(&a[k - 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k - 1 + (k - 1) * a_dim1; + d22 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d11 = a[i__1].r / d__; + tt = 1. / (d11 * d22 - 1.); + i__1 = k - 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d12.r = z__1.r, d12.i = z__1.i; + d__ = tt / d__; + for (j = k - 2; j >= 1; --j) { + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11 * a[i__1].r, z__3.i = d11 * a[i__1].i; + d_lmp_cnjg(&z__5, &d12); + i__2 = j + k * a_dim1; + z__4.r = z__5.r * a[i__2].r - z__5.i * a[i__2].i, + z__4.i = z__5.r * a[i__2].i + z__5.i * a[i__2].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22 * a[i__1].r, z__3.i = d22 * a[i__1].i; + i__2 = j + (k - 1) * a_dim1; + z__4.r = d12.r * a[i__2].r - d12.i * a[i__2].i, + z__4.i = d12.r * a[i__2].i + d12.i * a[i__2].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + d_lmp_cnjg(&z__4, &wk); + z__3.r = a[i__3].r * z__4.r - a[i__3].i * z__4.i, + z__3.i = a[i__3].r * z__4.i + a[i__3].i * z__4.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + d_lmp_cnjg(&z__6, &wkm1); + z__5.r = a[i__4].r * z__6.r - a[i__4].i * z__6.i, + z__5.i = a[i__4].r * z__6.i + a[i__4].i * z__6.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - z__5.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + i__1 = j + k * a_dim1; + a[i__1].r = wk.r, a[i__1].i = wk.i; + i__1 = j + (k - 1) * a_dim1; + a[i__1].r = wkm1.r, a[i__1].i = wkm1.i; + i__1 = j + j * a_dim1; + i__2 = j + j * a_dim1; + d__1 = a[i__2].r; + z__1.r = d__1, z__1.i = 0.; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L50: + if (k > *n) { + goto L90; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - 1; + for (j = kk + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + kk * a_dim1]); + t.r = z__1.r, t.i = z__1.i; + i__2 = j + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = t.r, a[i__2].i = t.i; + } + i__1 = kp + kk * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + kk * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = kk + kk * a_dim1; + r1 = a[i__1].r; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp + kp * a_dim1; + a[i__1].r = r1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } else { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (kstep == 2) { + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } + } + if (kstep == 1) { + if (k < *n) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = *n - k; + d__1 = -r1; + zher_(uplo, &i__1, &d__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * a_dim1; + d__1 = a[i__1].r; + d__2 = d_lmp_imag(&a[k + 1 + k * a_dim1]); + d__ = dlapy2_(&d__1, &d__2); + i__1 = k + 1 + (k + 1) * a_dim1; + d11 = a[i__1].r / d__; + i__1 = k + k * a_dim1; + d22 = a[i__1].r / d__; + tt = 1. / (d11 * d22 - 1.); + i__1 = k + 1 + k * a_dim1; + z__1.r = a[i__1].r / d__, z__1.i = a[i__1].i / d__; + d21.r = z__1.r, d21.i = z__1.i; + d__ = tt / d__; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + z__3.r = d11 * a[i__2].r, z__3.i = d11 * a[i__2].i; + i__3 = j + (k + 1) * a_dim1; + z__4.r = d21.r * a[i__3].r - d21.i * a[i__3].i, + z__4.i = d21.r * a[i__3].i + d21.i * a[i__3].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22 * a[i__2].r, z__3.i = d22 * a[i__2].i; + d_lmp_cnjg(&z__5, &d21); + i__3 = j + k * a_dim1; + z__4.r = z__5.r * a[i__3].r - z__5.i * a[i__3].i, + z__4.i = z__5.r * a[i__3].i + z__5.i * a[i__3].r; + z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; + z__1.r = d__ * z__2.r, z__1.i = d__ * z__2.i; + wkp1.r = z__1.r, wkp1.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + d_lmp_cnjg(&z__4, &wk); + z__3.r = a[i__5].r * z__4.r - a[i__5].i * z__4.i, + z__3.i = a[i__5].r * z__4.i + a[i__5].i * z__4.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + d_lmp_cnjg(&z__6, &wkp1); + z__5.r = a[i__6].r * z__6.r - a[i__6].i * z__6.i, + z__5.i = a[i__6].r * z__6.i + a[i__6].i * z__6.r; + z__1.r = z__2.r - z__5.r, z__1.i = z__2.i - z__5.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + k * a_dim1; + a[i__2].r = wk.r, a[i__2].i = wk.i; + i__2 = j + (k + 1) * a_dim1; + a[i__2].r = wkp1.r, a[i__2].i = wkp1.i; + i__2 = j + j * a_dim1; + i__3 = j + j * a_dim1; + d__1 = a[i__3].r; + z__1.r = d__1, z__1.i = 0.; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L50; + } +L90: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetrf.cpp b/lib/linalg/zhetrf.cpp new file mode 100644 index 0000000000..cb60ff4b7b --- /dev/null +++ b/lib/linalg/zhetrf.cpp @@ -0,0 +1,123 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zhetrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int zhetf2_(char *, integer *, doublecomplex *, integer *, integer *, integer *, ftnlen), + zlahef_(char *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, + doublecomplex *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork, lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZHETRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + zlahef_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], n, &iinfo, + (ftnlen)1); + } else { + zhetf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + zlahef_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], n, &iinfo, + (ftnlen)1); + } else { + i__1 = *n - k + 1; + zhetf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zhetri.cpp b/lib/linalg/zhetri.cpp new file mode 100644 index 0000000000..020b4ce52b --- /dev/null +++ b/lib/linalg/zhetri.cpp @@ -0,0 +1,319 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b2 = {0., 0.}; +static integer c__1 = 1; +int zhetri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1, z__2; + double z_lmp_abs(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *); + doublereal d__; + integer j, k; + doublereal t, ak; + integer kp; + doublereal akp1; + doublecomplex temp, akkp1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + integer kstep; + extern int zhemv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen); + logical upper; + extern int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZHETRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + for (*info = *n; *info >= 1; --(*info)) { + i__1 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } + } + } else { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } + } + } + *info = 0; + if (upper) { + k = 1; + L30: + if (k > *n) { + goto L50; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = 1. / a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + t = z_lmp_abs(&a[k + (k + 1) * a_dim1]); + i__1 = k + k * a_dim1; + ak = a[i__1].r / t; + i__1 = k + 1 + (k + 1) * a_dim1; + akp1 = a[i__1].r / t; + i__1 = k + (k + 1) * a_dim1; + z__1.r = a[i__1].r / t, z__1.i = a[i__1].i / t; + akkp1.r = z__1.r, akkp1.i = z__1.i; + d__ = t * (ak * akp1 - 1.); + i__1 = k + k * a_dim1; + d__1 = akp1 / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + 1 + (k + 1) * a_dim1; + d__1 = ak / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + (k + 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z__1.r = z__2.r / d__, z__1.i = z__2.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + i__2 = k + (k + 1) * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &a[k * a_dim1 + 1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k - 1; + zcopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[(k + 1) * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + i__3 = k - 1; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + i__1 = kp - 1; + zswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = k - 1; + for (j = kp + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = temp.r, a[i__2].i = temp.i; + } + i__1 = kp + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k + 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k + 1) * a_dim1; + i__2 = kp + (k + 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k + 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k += kstep; + goto L30; + L50:; + } else { + k = *n; + L60: + if (k < 1) { + goto L80; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = 1. / a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + t = z_lmp_abs(&a[k + (k - 1) * a_dim1]); + i__1 = k - 1 + (k - 1) * a_dim1; + ak = a[i__1].r / t; + i__1 = k + k * a_dim1; + akp1 = a[i__1].r / t; + i__1 = k + (k - 1) * a_dim1; + z__1.r = a[i__1].r / t, z__1.i = a[i__1].i / t; + akkp1.r = z__1.r, akkp1.i = z__1.i; + d__ = t * (ak * akp1 - 1.); + i__1 = k - 1 + (k - 1) * a_dim1; + d__1 = akp1 / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + k * a_dim1; + d__1 = ak / d__; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = k + (k - 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z__1.r = z__2.r / d__, z__1.i = z__2.i / d__; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + i__2 = k + (k - 1) * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + (k - 1) * a_dim1], + &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zhemv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + (k - 1) * a_dim1], &c__1, (ftnlen)1); + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + i__3 = *n - k; + zdotc_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); + d__1 = z__2.r; + z__1.r = a[i__2].r - d__1, z__1.i = a[i__2].i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - 1; + for (j = k + 1; j <= i__1; ++j) { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = kp + j * a_dim1; + a[i__2].r = temp.r, a[i__2].i = temp.i; + } + i__1 = kp + k * a_dim1; + d_lmp_cnjg(&z__1, &a[kp + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k - 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k - 1) * a_dim1; + i__2 = kp + (k - 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k - 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k -= kstep; + goto L60; + L80:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlahef.cpp b/lib/linalg/zlahef.cpp new file mode 100644 index 0000000000..9a18a455ea --- /dev/null +++ b/lib/linalg/zlahef.cpp @@ -0,0 +1,520 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlahef_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void d_lmp_cnjg(doublecomplex *, doublecomplex *), + z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j, k; + doublereal t, r1; + doublecomplex d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen); + integer kstep; + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk; + extern int zdscal_(integer *, doublereal *, doublecomplex *, integer *); + doublereal colmax; + extern int zlacgv_(integer *, doublecomplex *, integer *); + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + kstep = 1; + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = k + kw * w_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + i__1 = k + kw * w_dim1; + i__2 = k + kw * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + } + i__1 = k + kw * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = imax + kw * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + kw * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - 1; + zcopy_(&i__1, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = imax + (kw - 1) * w_dim1; + i__2 = imax + imax * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = k - imax; + zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + i__1 = k - imax; + zlacgv_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + i__1 = imax + (kw - 1) * w_dim1; + i__2 = imax + (kw - 1) * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + } + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (kw - 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kk - 1 - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = kk - 1 - kp; + zlacgv_(&i__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + if (k > 1) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = k - 1; + zdscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + i__1 = k - 1; + zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1); + } + } else { + if (k > 2) { + i__1 = k - 1 + kw * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + d_lmp_cnjg(&z__2, &d21); + z_lmp_div(&z__1, &w[k + kw * w_dim1], &z__2); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r * d22.i + d11.i * d22.r; + t = 1. / (z__1.r - 1.); + z__2.r = t, z__2.i = 0.; + z_lmp_div(&z__1, &z__2, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + i__2 = j + (k - 1) * a_dim1; + i__3 = j + (kw - 1) * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + kw * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__2, &d21); + i__3 = j + kw * w_dim1; + z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__4.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + (kw - 1) * w_dim1; + z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4].i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (kw - 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1 + k * a_dim1; + i__2 = k - 1 + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + k * a_dim1; + i__2 = k + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1; + zlacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = k - 2; + zlacgv_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj + jj * a_dim1; + i__4 = jj + jj * a_dim1; + d__1 = a[i__4].r; + a[i__3].r = d__1, a[i__3].i = 0.; + i__3 = jj - j + 1; + i__4 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__3, &i__4, &z__1, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b1, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + i__3 = jj + jj * a_dim1; + i__4 = jj + jj * a_dim1; + d__1 = a[i__4].r; + a[i__3].r = d__1, a[i__3].i = 0.; + } + i__2 = j - 1; + i__3 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &z__1, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b1, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + zswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + kstep = 1; + i__1 = k + k * w_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &w[k + 1 + k * w_dim1], &c__1); + } + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b1, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + i__1 = k + k * w_dim1; + i__2 = k + k * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = k + k * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = imax + k * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + k * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = imax - k; + zlacgv_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = imax + (k + 1) * w_dim1; + i__2 = imax + imax * a_dim1; + d__1 = a[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + if (imax < *n) { + i__1 = *n - imax; + zcopy_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1, + &w[imax + 1 + (k + 1) * w_dim1], &c__1); + } + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b1, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax + (k + 1) * w_dim1; + i__2 = imax + (k + 1) * w_dim1; + d__1 = w[i__2].r; + w[i__1].r = d__1, w[i__1].i = 0.; + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (k + 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + d__1 = a[i__2].r; + a[i__1].r = d__1, a[i__1].i = 0.; + i__1 = kp - kk - 1; + zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + i__1 = kp - kk - 1; + zlacgv_(&i__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + i__1 = k + k * a_dim1; + r1 = 1. / a[i__1].r; + i__1 = *n - k; + zdscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = *n - k; + zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + d_lmp_cnjg(&z__2, &d21); + z_lmp_div(&z__1, &w[k + k * w_dim1], &z__2); + d22.r = z__1.r, d22.i = z__1.i; + z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r * d22.i + d11.i * d22.r; + t = 1. / (z__1.r - 1.); + z__2.r = t, z__2.i = 0.; + z_lmp_div(&z__1, &z__2, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + d_lmp_cnjg(&z__2, &d21); + i__3 = j + k * w_dim1; + z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__4.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + (k + 1) * w_dim1; + z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4].i; + z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, + z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + i__3 = j + (k + 1) * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + k * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k + k * a_dim1; + i__2 = k + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + k * a_dim1; + i__2 = k + 1 + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = *n - k; + zlacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = *n - k - 1; + zlacgv_(&i__1, &w[k + 2 + (k + 1) * w_dim1], &c__1); + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = jj + jj * a_dim1; + i__5 = jj + jj * a_dim1; + d__1 = a[i__5].r; + a[i__4].r = d__1, a[i__4].i = 0.; + i__4 = j + jb - jj; + i__5 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__4, &i__5, &z__1, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b1, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + i__4 = jj + jj * a_dim1; + i__5 = jj + jj * a_dim1; + d__1 = a[i__5].r; + a[i__4].r = d__1, a[i__4].i = 0.; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &z__1, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b1, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + zswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlaswp.cpp b/lib/linalg/zlaswp.cpp new file mode 100644 index 0000000000..40e941ffa5 --- /dev/null +++ b/lib/linalg/zlaswp.cpp @@ -0,0 +1,79 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zlaswp_(integer *n, doublecomplex *a, integer *lda, integer *k1, integer *k2, integer *ipiv, + integer *incx) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc; + doublecomplex temp; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + if (*incx > 0) { + ix0 = *k1; + i1 = *k1; + i2 = *k2; + inc = 1; + } else if (*incx < 0) { + ix0 = *k1 + (*k1 - *k2) * *incx; + i1 = *k2; + i2 = *k1; + inc = -1; + } else { + return 0; + } + n32 = *n / 32 << 5; + if (n32 != 0) { + i__1 = n32; + for (j = 1; j <= i__1; j += 32) { + ix = ix0; + i__2 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__4 = j + 31; + for (k = j; k <= i__4; ++k) { + i__5 = i__ + k * a_dim1; + temp.r = a[i__5].r, temp.i = a[i__5].i; + i__5 = i__ + k * a_dim1; + i__6 = ip + k * a_dim1; + a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i; + i__5 = ip + k * a_dim1; + a[i__5].r = temp.r, a[i__5].i = temp.i; + } + } + ix += *incx; + } + } + } + if (n32 != *n) { + ++n32; + ix = ix0; + i__1 = i2; + i__3 = inc; + for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) { + ip = ipiv[ix]; + if (ip != i__) { + i__2 = *n; + for (k = n32; k <= i__2; ++k) { + i__4 = i__ + k * a_dim1; + temp.r = a[i__4].r, temp.i = a[i__4].i; + i__4 = i__ + k * a_dim1; + i__5 = ip + k * a_dim1; + a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i; + i__4 = ip + k * a_dim1; + a[i__4].r = temp.r, a[i__4].i = temp.i; + } + } + ix += *incx; + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlasyf.cpp b/lib/linalg/zlasyf.cpp new file mode 100644 index 0000000000..2823d173de --- /dev/null +++ b/lib/linalg/zlasyf.cpp @@ -0,0 +1,431 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, + integer *ipiv, doublecomplex *w, integer *ldw, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j, k; + doublecomplex t, r1, d11, d21, d22; + integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax; + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), + zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, + integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen, ftnlen); + integer kstep; + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen), + zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk, colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + w_dim1 = *ldw; + w_offset = 1 + w_dim1; + w -= w_offset; + *info = 0; + alpha = (sqrt(17.) + 1.) / 8.; + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + k = *n; + L10: + kw = *nb + k - *n; + if (k <= *n - *nb + 1 && *nb < *n || k < 1) { + goto L30; + } + zcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw * w_dim1 + 1], &c__1, (ftnlen)12); + } + kstep = 1; + i__1 = k + kw * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[k + kw * w_dim1]), abs(d__2)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &w[kw * w_dim1 + 1], &c__1); + i__1 = imax + kw * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + kw * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + zcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = k - imax; + zcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], + &c__1); + if (k < *n) { + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &k, &i__1, &z__1, &a[(k + 1) * a_dim1 + 1], lda, + &w[imax + (kw + 1) * w_dim1], ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], + &c__1, (ftnlen)12); + } + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1); + i__1 = jmax + (kw - 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (kw - 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (kw - 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[imax + (kw - 1) * w_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + zcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + kkw = *nb + kk - *n; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kk - 1 - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + if (kp > 1) { + i__1 = kp - 1; + zcopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + } + if (k < *n) { + i__1 = *n - k; + zswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda); + } + i__1 = *n - kk + 1; + zswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw); + } + if (kstep == 1) { + zcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = k - 1; + zscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + kw * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + kw * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k - 1 + (kw - 1) * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = k - 2; + for (j = 1; j <= i__1; ++j) { + i__2 = j + (k - 1) * a_dim1; + i__3 = j + (kw - 1) * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + kw * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + k * a_dim1; + i__3 = j + kw * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + (kw - 1) * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (kw - 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k - 1 + k * a_dim1; + i__2 = k - 1 + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + k * a_dim1; + i__2 = k + kw * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + L30: + i__1 = -(*nb); + for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { + i__2 = *nb, i__3 = k - j + 1; + jb = min(i__2, i__3); + i__2 = j + jb - 1; + for (jj = j; jj <= i__2; ++jj) { + i__3 = jj - j + 1; + i__4 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__3, &i__4, &z__1, &a[j + (k + 1) * a_dim1], lda, + &w[jj + (kw + 1) * w_dim1], ldw, &c_b1, &a[j + jj * a_dim1], &c__1, + (ftnlen)12); + } + i__2 = j - 1; + i__3 = *n - k; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__2, &jb, &i__3, &z__1, &a[(k + 1) * a_dim1 + 1], + lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b1, &a[j * a_dim1 + 1], lda, (ftnlen)12, + (ftnlen)9); + } + j = k + 1; + L60: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + ++j; + } + ++j; + if (jp != jj && j <= *n) { + i__1 = *n - j + 1; + zswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda); + } + if (j < *n) { + goto L60; + } + *kb = *n - k; + } else { + k = 1; + L70: + if (k >= *nb && *nb < *n || k > *n) { + goto L90; + } + i__1 = *n - k + 1; + zcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b1, + &w[k + k * w_dim1], &c__1, (ftnlen)12); + kstep = 1; + i__1 = k + k * w_dim1; + absakk = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[k + k * w_dim1]), abs(d__2)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1); + i__1 = imax + k * w_dim1; + colmax = + (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&w[imax + k * w_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0.) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + zcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = *n - imax + 1; + zcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1); + i__1 = *n - k + 1; + i__2 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__1, &i__2, &z__1, &a[k + a_dim1], lda, &w[imax + w_dim1], + ldw, &c_b1, &w[k + (k + 1) * w_dim1], &c__1, (ftnlen)12); + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + rowmax = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1); + i__1 = jmax + (k + 1) * w_dim1; + d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[jmax + (k + 1) * w_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + (k + 1) * w_dim1; + if ((d__1 = w[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&w[imax + (k + 1) * w_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1); + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + i__1 = kp + kp * a_dim1; + i__2 = kk + kk * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp - kk - 1; + zcopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + if (kp < *n) { + i__1 = *n - kp; + zcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + if (k > 1) { + i__1 = k - 1; + zswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda); + } + zswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw); + } + if (kstep == 1) { + i__1 = *n - k + 1; + zcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &c__1); + if (k < *n) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = *n - k; + zscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * w_dim1; + d21.r = w[i__1].r, d21.i = w[i__1].i; + z_lmp_div(&z__1, &w[k + 1 + (k + 1) * w_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &w[k + k * w_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + i__3 = j + k * w_dim1; + z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, + z__3.i = d11.r * w[i__3].i + d11.i * w[i__3].r; + i__4 = j + (k + 1) * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + i__3 = j + (k + 1) * w_dim1; + z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, + z__3.i = d22.r * w[i__3].i + d22.i * w[i__3].r; + i__4 = j + k * w_dim1; + z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + } + } + i__1 = k + k * a_dim1; + i__2 = k + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + k * a_dim1; + i__2 = k + 1 + k * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * w_dim1; + a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L70; + L90: + i__1 = *n; + i__2 = *nb; + for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = *nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j + jb - 1; + for (jj = j; jj <= i__3; ++jj) { + i__4 = j + jb - jj; + i__5 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemv_((char *)"No transpose", &i__4, &i__5, &z__1, &a[jj + a_dim1], lda, &w[jj + w_dim1], + ldw, &c_b1, &a[jj + jj * a_dim1], &c__1, (ftnlen)12); + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = k - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &jb, &i__4, &z__1, &a[j + jb + a_dim1], + lda, &w[j + w_dim1], ldw, &c_b1, &a[j + jb + j * a_dim1], lda, (ftnlen)12, + (ftnlen)9); + } + } + j = k - 1; + L120: + jj = j; + jp = ipiv[j]; + if (jp < 0) { + jp = -jp; + --j; + } + --j; + if (jp != jj && j >= 1) { + zswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda); + } + if (j > 1) { + goto L120; + } + *kb = k - 1; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlauu2.cpp b/lib/linalg/zlauu2.cpp new file mode 100644 index 0000000000..2e92542d49 --- /dev/null +++ b/lib/linalg/zlauu2.cpp @@ -0,0 +1,100 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zlauu2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublereal d__1; + doublecomplex z__1; + integer i__; + doublereal aii; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + extern int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, + ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen), + zdscal_(integer *, doublereal *, doublecomplex *, integer *), + zlacgv_(integer *, doublecomplex *, integer *); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAUU2", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, + &a[i__ + (i__ + 1) * a_dim1], lda); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + z__1.r = aii, z__1.i = 0.; + zgemv_((char *)"No transpose", &i__2, &i__3, &c_b1, &a[(i__ + 1) * a_dim1 + 1], lda, + &a[i__ + (i__ + 1) * a_dim1], lda, &z__1, &a[i__ * a_dim1 + 1], &c__1, + (ftnlen)12); + i__2 = *n - i__; + zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + i__ * a_dim1; + aii = a[i__2].r; + if (i__ < *n) { + i__2 = i__ + i__ * a_dim1; + i__3 = *n - i__; + zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], + &c__1); + d__1 = aii * aii + z__1.r; + a[i__2].r = d__1, a[i__2].i = 0.; + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + i__2 = *n - i__; + i__3 = i__ - 1; + z__1.r = aii, z__1.i = 0.; + zgemv_((char *)"Conjugate transpose", &i__2, &i__3, &c_b1, &a[i__ + 1 + a_dim1], lda, + &a[i__ + 1 + i__ * a_dim1], &c__1, &z__1, &a[i__ + a_dim1], lda, (ftnlen)19); + i__2 = i__ - 1; + zlacgv_(&i__2, &a[i__ + a_dim1], lda); + } else { + zdscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zlauum.cpp b/lib/linalg/zlauum.cpp new file mode 100644 index 0000000000..e61268ec3d --- /dev/null +++ b/lib/linalg/zlauum.cpp @@ -0,0 +1,103 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b21 = 1.; +int zlauum_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + integer i__, ib, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, + doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + zlauu2_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZLAUUM", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zlauu2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3, i__4); + i__3 = i__ - 1; + ztrmm_((char *)"Right", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Non-unit", &i__3, &ib, &c_b1, + &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, + (ftnlen)19, (ftnlen)8); + zlauu2_((char *)"Upper", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", &i__3, &ib, &i__4, &c_b1, + &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + (i__ + ib) * a_dim1], lda, + &c_b1, &a[i__ * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)19); + i__3 = *n - i__ - ib + 1; + zherk_((char *)"Upper", (char *)"No transpose", &ib, &i__3, &c_b21, + &a[i__ + (i__ + ib) * a_dim1], lda, &c_b21, &a[i__ + i__ * a_dim1], lda, + (ftnlen)5, (ftnlen)12); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3, i__4); + i__3 = i__ - 1; + ztrmm_((char *)"Left", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Non-unit", &ib, &i__3, &c_b1, + &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], lda, (ftnlen)4, (ftnlen)5, + (ftnlen)19, (ftnlen)8); + zlauu2_((char *)"Lower", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + zgemm_((char *)"Conjugate transpose", (char *)"No transpose", &ib, &i__3, &i__4, &c_b1, + &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + ib + a_dim1], lda, &c_b1, + &a[i__ + a_dim1], lda, (ftnlen)19, (ftnlen)12); + i__3 = *n - i__ - ib + 1; + zherk_((char *)"Lower", (char *)"Conjugate transpose", &ib, &i__3, &c_b21, + &a[i__ + ib + i__ * a_dim1], lda, &c_b21, &a[i__ + i__ * a_dim1], lda, + (ftnlen)5, (ftnlen)19); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotrf.cpp b/lib/linalg/zpotrf.cpp new file mode 100644 index 0000000000..5679af9ca9 --- /dev/null +++ b/lib/linalg/zpotrf.cpp @@ -0,0 +1,115 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b14 = -1.; +static doublereal c_b15 = 1.; +int zpotrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + doublecomplex z__1; + integer j, jb, nb; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + doublecomplex *, integer *, ftnlen, ftnlen), + zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, + doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + extern int zpotrf2_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRF", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + zpotrf2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j - 1; + zherk_((char *)"Upper", (char *)"Conjugate transpose", &jb, &i__3, &c_b14, &a[j * a_dim1 + 1], lda, + &c_b15, &a[j + j * a_dim1], lda, (ftnlen)5, (ftnlen)19); + zpotrf2_((char *)"Upper", &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"Conjugate transpose", (char *)"No transpose", &jb, &i__3, &i__4, &z__1, + &a[j * a_dim1 + 1], lda, &a[(j + jb) * a_dim1 + 1], lda, &c_b1, + &a[j + (j + jb) * a_dim1], lda, (ftnlen)19, (ftnlen)12); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Left", (char *)"Upper", (char *)"Conjugate transpose", (char *)"Non-unit", &jb, &i__3, &c_b1, + &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4, + (ftnlen)5, (ftnlen)19, (ftnlen)8); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3, i__4); + i__3 = j - 1; + zherk_((char *)"Lower", (char *)"No transpose", &jb, &i__3, &c_b14, &a[j + a_dim1], lda, &c_b15, + &a[j + j * a_dim1], lda, (ftnlen)5, (ftnlen)12); + zpotrf2_((char *)"Lower", &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + i__3 = *n - j - jb + 1; + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + zgemm_((char *)"No transpose", (char *)"Conjugate transpose", &i__3, &jb, &i__4, &z__1, + &a[j + jb + a_dim1], lda, &a[j + a_dim1], lda, &c_b1, + &a[j + jb + j * a_dim1], lda, (ftnlen)12, (ftnlen)19); + i__3 = *n - j - jb + 1; + ztrsm_((char *)"Right", (char *)"Lower", (char *)"Conjugate transpose", (char *)"Non-unit", &i__3, &jb, &c_b1, + &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)5, + (ftnlen)5, (ftnlen)19, (ftnlen)8); + } + } + } + } + goto L40; +L30: + *info = *info + j - 1; +L40: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotrf2.cpp b/lib/linalg/zpotrf2.cpp new file mode 100644 index 0000000000..262ea15497 --- /dev/null +++ b/lib/linalg/zpotrf2.cpp @@ -0,0 +1,89 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublereal c_b11 = -1.; +static doublereal c_b12 = 1.; +int zpotrf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1; + doublereal d__1; + double sqrt(doublereal); + integer n1, n2; + doublereal ajj; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer iinfo; + extern int zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, + integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); + logical upper; + extern int ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen); + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRF2", &i__1, (ftnlen)7); + return 0; + } + if (*n == 0) { + return 0; + } + if (*n == 1) { + i__1 = a_dim1 + 1; + ajj = a[i__1].r; + if (ajj <= 0. || disnan_(&ajj)) { + *info = 1; + return 0; + } + i__1 = a_dim1 + 1; + d__1 = sqrt(ajj); + a[i__1].r = d__1, a[i__1].i = 0.; + } else { + n1 = *n / 2; + n2 = *n - n1; + zpotrf2_(uplo, &n1, &a[a_dim1 + 1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo; + return 0; + } + if (upper) { + ztrsm_((char *)"L", (char *)"U", (char *)"C", (char *)"N", &n1, &n2, &c_b1, &a[a_dim1 + 1], lda, + &a[(n1 + 1) * a_dim1 + 1], lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zherk_(uplo, (char *)"C", &n2, &n1, &c_b11, &a[(n1 + 1) * a_dim1 + 1], lda, &c_b12, + &a[n1 + 1 + (n1 + 1) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + zpotrf2_(uplo, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo + n1; + return 0; + } + } else { + ztrsm_((char *)"R", (char *)"L", (char *)"C", (char *)"N", &n2, &n1, &c_b1, &a[a_dim1 + 1], lda, &a[n1 + 1 + a_dim1], + lda, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); + zherk_(uplo, (char *)"N", &n2, &n1, &c_b11, &a[n1 + 1 + a_dim1], lda, &c_b12, + &a[n1 + 1 + (n1 + 1) * a_dim1], lda, (ftnlen)1, (ftnlen)1); + zpotrf2_(uplo, &n2, &a[n1 + 1 + (n1 + 1) * a_dim1], lda, &iinfo, (ftnlen)1); + if (iinfo != 0) { + *info = iinfo + n1; + return 0; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zpotri.cpp b/lib/linalg/zpotri.cpp new file mode 100644 index 0000000000..a13f6fde5c --- /dev/null +++ b/lib/linalg/zpotri.cpp @@ -0,0 +1,40 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zpotri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen), + zlauum_(char *, integer *, doublecomplex *, integer *, integer *, ftnlen), + ztrtri_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZPOTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + ztrtri_(uplo, (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)8); + if (*info > 0) { + return 0; + } + zlauum_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsymv.cpp b/lib/linalg/zsymv.cpp new file mode 100644 index 0000000000..73e956493b --- /dev/null +++ b/lib/linalg/zsymv.cpp @@ -0,0 +1,263 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zsymv_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *incy, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3, z__4; + integer i__, j, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*lda < max(1, *n)) { + info = 5; + } else if (*incx == 0) { + info = 7; + } else if (*incy == 0) { + info = 10; + } + if (info != 0) { + xerbla_((char *)"ZSYMV ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && beta->i == 0.)) { + return 0; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3].r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__3.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__3.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; + } + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + z__3.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__3.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__4.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + } + } + } else { + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j; + i__3 = j; + i__4 = j + j * a_dim1; + z__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__2.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = j; + i__3 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = jy; + i__3 = jy; + i__4 = j + j * a_dim1; + z__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, + z__2.i = temp1.r * a[i__4].i + temp1.i * a[i__4].r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + ix = jx; + iy = jy; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5].r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, + z__2.i = alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsyr.cpp b/lib/linalg/zsyr.cpp new file mode 100644 index 0000000000..5e79f28d94 --- /dev/null +++ b/lib/linalg/zsyr.cpp @@ -0,0 +1,141 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int zsyr_(char *uplo, integer *n, doublecomplex *alpha, doublecomplex *x, integer *incx, + doublecomplex *a, integer *lda, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2; + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + --x; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 5; + } else if (*lda < max(1, *n)) { + info = 7; + } + if (info != 0) { + xerbla_((char *)"ZSYR ", &info, (ftnlen)6); + return 0; + } + if (*n == 0 || alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + ix = kx; + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + } + jx += *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, + z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + ix = jx; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = ix; + z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, + z__2.i = x[i__5].r * temp.i + x[i__5].i * temp.r; + z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + ix += *incx; + } + } + jx += *incx; + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytf2.cpp b/lib/linalg/zsytf2.cpp new file mode 100644 index 0000000000..bce7b51f1d --- /dev/null +++ b/lib/linalg/zsytf2.cpp @@ -0,0 +1,356 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int zsytf2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, integer *info, + ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; + doublereal d__1, d__2, d__3, d__4; + doublecomplex z__1, z__2, z__3, z__4; + double sqrt(doublereal), d_lmp_imag(doublecomplex *); + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer i__, j, k; + doublecomplex t, r1, d11, d12, d21, d22; + integer kk, kp; + doublecomplex wk, wkm1, wkp1; + integer imax, jmax; + extern int zsyr_(char *, integer *, doublecomplex *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen); + doublereal alpha; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *); + integer kstep; + logical upper; + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + doublereal absakk; + extern logical disnan_(doublereal *); + extern int xerbla_(char *, integer *, ftnlen); + doublereal colmax; + extern integer izamax_(integer *, doublecomplex *, integer *); + doublereal rowmax; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTF2", &i__1, (ftnlen)6); + return 0; + } + alpha = (sqrt(17.) + 1.) / 8.; + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L70; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[k + k * a_dim1]), abs(d__2)); + if (k > 1) { + i__1 = k - 1; + imax = izamax_(&i__1, &a[k * a_dim1 + 1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = k - imax; + jmax = imax + izamax_(&i__1, &a[imax + (imax + 1) * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax > 1) { + i__1 = imax - 1; + jmax = izamax_(&i__1, &a[imax * a_dim1 + 1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + imax * a_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k - kstep + 1; + if (kp != kk) { + i__1 = kp - 1; + zswap_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = kk - kp - 1; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = kk + kk * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + if (kstep == 2) { + i__1 = k - 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k - 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } + if (kstep == 1) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = k - 1; + z__1.r = -r1.r, z__1.i = -r1.i; + zsyr_(uplo, &i__1, &z__1, &a[k * a_dim1 + 1], &c__1, &a[a_offset], lda, (ftnlen)1); + i__1 = k - 1; + zscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1); + } else { + if (k > 2) { + i__1 = k - 1 + k * a_dim1; + d12.r = a[i__1].r, d12.i = a[i__1].i; + z_lmp_div(&z__1, &a[k - 1 + (k - 1) * a_dim1], &d12); + d22.r = z__1.r, d22.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &d12); + d11.r = z__1.r, d11.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d12); + d12.r = z__1.r, d12.i = z__1.i; + for (j = k - 2; j >= 1; --j) { + i__1 = j + (k - 1) * a_dim1; + z__3.r = d11.r * a[i__1].r - d11.i * a[i__1].i, + z__3.i = d11.r * a[i__1].i + d11.i * a[i__1].r; + i__2 = j + k * a_dim1; + z__2.r = z__3.r - a[i__2].r, z__2.i = z__3.i - a[i__2].i; + z__1.r = d12.r * z__2.r - d12.i * z__2.i, + z__1.i = d12.r * z__2.i + d12.i * z__2.r; + wkm1.r = z__1.r, wkm1.i = z__1.i; + i__1 = j + k * a_dim1; + z__3.r = d22.r * a[i__1].r - d22.i * a[i__1].i, + z__3.i = d22.r * a[i__1].i + d22.i * a[i__1].r; + i__2 = j + (k - 1) * a_dim1; + z__2.r = z__3.r - a[i__2].r, z__2.i = z__3.i - a[i__2].i; + z__1.r = d12.r * z__2.r - d12.i * z__2.i, + z__1.i = d12.r * z__2.i + d12.i * z__2.r; + wk.r = z__1.r, wk.i = z__1.i; + for (i__ = j; i__ >= 1; --i__) { + i__1 = i__ + j * a_dim1; + i__2 = i__ + j * a_dim1; + i__3 = i__ + k * a_dim1; + z__3.r = a[i__3].r * wk.r - a[i__3].i * wk.i, + z__3.i = a[i__3].r * wk.i + a[i__3].i * wk.r; + z__2.r = a[i__2].r - z__3.r, z__2.i = a[i__2].i - z__3.i; + i__4 = i__ + (k - 1) * a_dim1; + z__4.r = a[i__4].r * wkm1.r - a[i__4].i * wkm1.i, + z__4.i = a[i__4].r * wkm1.i + a[i__4].i * wkm1.r; + z__1.r = z__2.r - z__4.r, z__1.i = z__2.i - z__4.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + i__1 = j + k * a_dim1; + a[i__1].r = wk.r, a[i__1].i = wk.i; + i__1 = j + (k - 1) * a_dim1; + a[i__1].r = wkm1.r, a[i__1].i = wkm1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k - 1] = -kp; + } + k -= kstep; + goto L10; + } else { + k = 1; + L40: + if (k > *n) { + goto L70; + } + kstep = 1; + i__1 = k + k * a_dim1; + absakk = (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[k + k * a_dim1]), abs(d__2)); + if (k < *n) { + i__1 = *n - k; + imax = k + izamax_(&i__1, &a[k + 1 + k * a_dim1], &c__1); + i__1 = imax + k * a_dim1; + colmax = + (d__1 = a[i__1].r, abs(d__1)) + (d__2 = d_lmp_imag(&a[imax + k * a_dim1]), abs(d__2)); + } else { + colmax = 0.; + } + if (max(absakk, colmax) == 0. || disnan_(&absakk)) { + if (*info == 0) { + *info = k; + } + kp = k; + } else { + if (absakk >= alpha * colmax) { + kp = k; + } else { + i__1 = imax - k; + jmax = k - 1 + izamax_(&i__1, &a[imax + k * a_dim1], lda); + i__1 = imax + jmax * a_dim1; + rowmax = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + jmax * a_dim1]), abs(d__2)); + if (imax < *n) { + i__1 = *n - imax; + jmax = imax + izamax_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1); + i__1 = jmax + imax * a_dim1; + d__3 = rowmax, d__4 = (d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[jmax + imax * a_dim1]), abs(d__2)); + rowmax = max(d__3, d__4); + } + if (absakk >= alpha * colmax * (colmax / rowmax)) { + kp = k; + } else { + i__1 = imax + imax * a_dim1; + if ((d__1 = a[i__1].r, abs(d__1)) + + (d__2 = d_lmp_imag(&a[imax + imax * a_dim1]), abs(d__2)) >= + alpha * rowmax) { + kp = imax; + } else { + kp = imax; + kstep = 2; + } + } + } + kk = k + kstep - 1; + if (kp != kk) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - kk - 1; + zswap_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda); + i__1 = kk + kk * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = kk + kk * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + if (kstep == 2) { + i__1 = k + 1 + k * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + i__1 = k + 1 + k * a_dim1; + i__2 = kp + k * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + k * a_dim1; + a[i__1].r = t.r, a[i__1].i = t.i; + } + } + if (kstep == 1) { + if (k < *n) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + r1.r = z__1.r, r1.i = z__1.i; + i__1 = *n - k; + z__1.r = -r1.r, z__1.i = -r1.i; + zsyr_(uplo, &i__1, &z__1, &a[k + 1 + k * a_dim1], &c__1, + &a[k + 1 + (k + 1) * a_dim1], lda, (ftnlen)1); + i__1 = *n - k; + zscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1); + } + } else { + if (k < *n - 1) { + i__1 = k + 1 + k * a_dim1; + d21.r = a[i__1].r, d21.i = a[i__1].i; + z_lmp_div(&z__1, &a[k + 1 + (k + 1) * a_dim1], &d21); + d11.r = z__1.r, d11.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &d21); + d22.r = z__1.r, d22.i = z__1.i; + z__3.r = d11.r * d22.r - d11.i * d22.i, z__3.i = d11.r * d22.i + d11.i * d22.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z_lmp_div(&z__1, &c_b1, &z__2); + t.r = z__1.r, t.i = z__1.i; + z_lmp_div(&z__1, &t, &d21); + d21.r = z__1.r, d21.i = z__1.i; + i__1 = *n; + for (j = k + 2; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + z__3.r = d11.r * a[i__2].r - d11.i * a[i__2].i, + z__3.i = d11.r * a[i__2].i + d11.i * a[i__2].r; + i__3 = j + (k + 1) * a_dim1; + z__2.r = z__3.r - a[i__3].r, z__2.i = z__3.i - a[i__3].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + wk.r = z__1.r, wk.i = z__1.i; + i__2 = j + (k + 1) * a_dim1; + z__3.r = d22.r * a[i__2].r - d22.i * a[i__2].i, + z__3.i = d22.r * a[i__2].i + d22.i * a[i__2].r; + i__3 = j + k * a_dim1; + z__2.r = z__3.r - a[i__3].r, z__2.i = z__3.i - a[i__3].i; + z__1.r = d21.r * z__2.r - d21.i * z__2.i, + z__1.i = d21.r * z__2.i + d21.i * z__2.r; + wkp1.r = z__1.r, wkp1.i = z__1.i; + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__ + j * a_dim1; + i__5 = i__ + k * a_dim1; + z__3.r = a[i__5].r * wk.r - a[i__5].i * wk.i, + z__3.i = a[i__5].r * wk.i + a[i__5].i * wk.r; + z__2.r = a[i__4].r - z__3.r, z__2.i = a[i__4].i - z__3.i; + i__6 = i__ + (k + 1) * a_dim1; + z__4.r = a[i__6].r * wkp1.r - a[i__6].i * wkp1.i, + z__4.i = a[i__6].r * wkp1.i + a[i__6].i * wkp1.r; + z__1.r = z__2.r - z__4.r, z__1.i = z__2.i - z__4.i; + a[i__3].r = z__1.r, a[i__3].i = z__1.i; + } + i__2 = j + k * a_dim1; + a[i__2].r = wk.r, a[i__2].i = wk.i; + i__2 = j + (k + 1) * a_dim1; + a[i__2].r = wkp1.r, a[i__2].i = wkp1.i; + } + } + } + } + if (kstep == 1) { + ipiv[k] = kp; + } else { + ipiv[k] = -kp; + ipiv[k + 1] = -kp; + } + k += kstep; + goto L40; + } +L70: + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytrf.cpp b/lib/linalg/zsytrf.cpp new file mode 100644 index 0000000000..178193fbaa --- /dev/null +++ b/lib/linalg/zsytrf.cpp @@ -0,0 +1,124 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int zsytrf_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *lwork, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2; + integer j, k, kb, nb, iws; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nbmin, iinfo; + logical upper; + extern int zsytf2_(char *, integer *, doublecomplex *, integer *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + integer ldwork; + extern int zlasyf_(char *, integer *, integer *, integer *, doublecomplex *, integer *, + integer *, doublecomplex *, integer *, integer *, ftnlen); + integer lwkopt; + logical lquery; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + lquery = *lwork == -1; + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } else if (*lwork < 1 && !lquery) { + *info = -7; + } + if (*info == 0) { + nb = ilaenv_(&c__1, (char *)"ZSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + lwkopt = *n * nb; + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTRF", &i__1, (ftnlen)6); + return 0; + } else if (lquery) { + return 0; + } + nbmin = 2; + ldwork = *n; + if (nb > 1 && nb < *n) { + iws = ldwork * nb; + if (*lwork < iws) { + i__1 = *lwork / ldwork; + nb = max(i__1, 1); + i__1 = 2, + i__2 = ilaenv_(&c__2, (char *)"ZSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + nbmin = max(i__1, i__2); + } + } else { + iws = 1; + } + if (nb < nbmin) { + nb = *n; + } + if (upper) { + k = *n; + L10: + if (k < 1) { + goto L40; + } + if (k > nb) { + zlasyf_(uplo, &k, &nb, &kb, &a[a_offset], lda, &ipiv[1], &work[1], n, &iinfo, + (ftnlen)1); + } else { + zsytf2_(uplo, &k, &a[a_offset], lda, &ipiv[1], &iinfo, (ftnlen)1); + kb = k; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo; + } + k -= kb; + goto L10; + } else { + k = 1; + L20: + if (k > *n) { + goto L40; + } + if (k <= *n - nb) { + i__1 = *n - k + 1; + zlasyf_(uplo, &i__1, &nb, &kb, &a[k + k * a_dim1], lda, &ipiv[k], &work[1], n, &iinfo, + (ftnlen)1); + } else { + i__1 = *n - k + 1; + zsytf2_(uplo, &i__1, &a[k + k * a_dim1], lda, &ipiv[k], &iinfo, (ftnlen)1); + kb = *n - k + 1; + } + if (*info == 0 && iinfo > 0) { + *info = iinfo + k - 1; + } + i__1 = k + kb - 1; + for (j = k; j <= i__1; ++j) { + if (ipiv[j] > 0) { + ipiv[j] = ipiv[j] + k - 1; + } else { + ipiv[j] = ipiv[j] - k + 1; + } + } + k += kb; + goto L20; + } +L40: + work[1].r = (doublereal)lwkopt, work[1].i = 0.; + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/zsytri.cpp b/lib/linalg/zsytri.cpp new file mode 100644 index 0000000000..3f7d4dea0f --- /dev/null +++ b/lib/linalg/zsytri.cpp @@ -0,0 +1,292 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static doublecomplex c_b2 = {0., 0.}; +static integer c__1 = 1; +int zsytri_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *ipiv, + doublecomplex *work, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + doublecomplex d__; + integer k; + doublecomplex t, ak; + integer kp; + doublecomplex akp1, temp, akkp1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kstep; + logical upper; + extern int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); + extern VOID zdotu_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, + integer *); + extern int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), + zsymv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, + integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --ipiv; + --work; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZSYTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + for (*info = *n; *info >= 1; --(*info)) { + i__1 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__1].r == 0. && a[i__1].i == 0.)) { + return 0; + } + } + } else { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (ipiv[*info] > 0 && (a[i__2].r == 0. && a[i__2].i == 0.)) { + return 0; + } + } + } + *info = 0; + if (upper) { + k = 1; + L30: + if (k > *n) { + goto L40; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + i__1 = k + (k + 1) * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &t); + ak.r = z__1.r, ak.i = z__1.i; + z_lmp_div(&z__1, &a[k + 1 + (k + 1) * a_dim1], &t); + akp1.r = z__1.r, akp1.i = z__1.i; + z_lmp_div(&z__1, &a[k + (k + 1) * a_dim1], &t); + akkp1.r = z__1.r, akkp1.i = z__1.i; + z__3.r = ak.r * akp1.r - ak.i * akp1.i, z__3.i = ak.r * akp1.i + ak.i * akp1.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r * z__2.i + t.i * z__2.r; + d__.r = z__1.r, d__.i = z__1.i; + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &akp1, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + 1 + (k + 1) * a_dim1; + z_lmp_div(&z__1, &ak, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z_lmp_div(&z__1, &z__2, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k > 1) { + i__1 = k - 1; + zcopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[k * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k + 1) * a_dim1; + i__2 = k + (k + 1) * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &a[k * a_dim1 + 1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k - 1; + zcopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], &c__1); + i__1 = k - 1; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[a_offset], lda, &work[1], &c__1, &c_b2, + &a[(k + 1) * a_dim1 + 1], &c__1, (ftnlen)1); + i__1 = k + 1 + (k + 1) * a_dim1; + i__2 = k + 1 + (k + 1) * a_dim1; + i__3 = k - 1; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + i__1 = kp - 1; + zswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); + i__1 = k - kp - 1; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k + 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k + 1) * a_dim1; + i__2 = kp + (k + 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k + 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k += kstep; + goto L30; + L40:; + } else { + k = *n; + L50: + if (k < 1) { + goto L60; + } + if (ipiv[k] > 0) { + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 1; + } else { + i__1 = k + (k - 1) * a_dim1; + t.r = a[i__1].r, t.i = a[i__1].i; + z_lmp_div(&z__1, &a[k - 1 + (k - 1) * a_dim1], &t); + ak.r = z__1.r, ak.i = z__1.i; + z_lmp_div(&z__1, &a[k + k * a_dim1], &t); + akp1.r = z__1.r, akp1.i = z__1.i; + z_lmp_div(&z__1, &a[k + (k - 1) * a_dim1], &t); + akkp1.r = z__1.r, akkp1.i = z__1.i; + z__3.r = ak.r * akp1.r - ak.i * akp1.i, z__3.i = ak.r * akp1.i + ak.i * akp1.r; + z__2.r = z__3.r - 1., z__2.i = z__3.i - 0.; + z__1.r = t.r * z__2.r - t.i * z__2.i, z__1.i = t.r * z__2.i + t.i * z__2.r; + d__.r = z__1.r, d__.i = z__1.i; + i__1 = k - 1 + (k - 1) * a_dim1; + z_lmp_div(&z__1, &akp1, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + k * a_dim1; + z_lmp_div(&z__1, &ak, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + z__2.r = -akkp1.r, z__2.i = -akkp1.i; + z_lmp_div(&z__1, &z__2, &d__); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + if (k < *n) { + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + k * a_dim1], &c__1, (ftnlen)1); + i__1 = k + k * a_dim1; + i__2 = k + k * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = k + (k - 1) * a_dim1; + i__2 = k + (k - 1) * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &a[k + 1 + k * a_dim1], &c__1, &a[k + 1 + (k - 1) * a_dim1], + &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = *n - k; + zcopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], &c__1); + i__1 = *n - k; + z__1.r = -1., z__1.i = -0.; + zsymv_(uplo, &i__1, &z__1, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, + &c_b2, &a[k + 1 + (k - 1) * a_dim1], &c__1, (ftnlen)1); + i__1 = k - 1 + (k - 1) * a_dim1; + i__2 = k - 1 + (k - 1) * a_dim1; + i__3 = *n - k; + zdotu_(&z__2, &i__3, &work[1], &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); + z__1.r = a[i__2].r - z__2.r, z__1.i = a[i__2].i - z__2.i; + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + } + kstep = 2; + } + kp = (i__1 = ipiv[k], abs(i__1)); + if (kp != k) { + if (kp < *n) { + i__1 = *n - kp; + zswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); + } + i__1 = kp - k - 1; + zswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); + i__1 = k + k * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + k * a_dim1; + i__2 = kp + kp * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + kp * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + if (kstep == 2) { + i__1 = k + (k - 1) * a_dim1; + temp.r = a[i__1].r, temp.i = a[i__1].i; + i__1 = k + (k - 1) * a_dim1; + i__2 = kp + (k - 1) * a_dim1; + a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; + i__1 = kp + (k - 1) * a_dim1; + a[i__1].r = temp.r, a[i__1].i = temp.i; + } + } + k -= kstep; + goto L50; + L60:; + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrsm.cpp b/lib/linalg/ztrsm.cpp new file mode 100644 index 0000000000..160b65974a --- /dev/null +++ b/lib/linalg/ztrsm.cpp @@ -0,0 +1,443 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +int ztrsm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, + doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, + ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *), + d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, k, info; + doublecomplex temp; + logical lside; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer nrowa; + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + lside = lsame_(side, (char *)"L", (ftnlen)1, (ftnlen)1); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + noconj = lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + info = 0; + if (!lside && !lsame_(side, (char *)"R", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (!lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(transa, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(transa, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (!lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && + !lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1, nrowa)) { + info = 9; + } else if (*ldb < max(1, *m)) { + info = 11; + } + if (info != 0) { + xerbla_((char *)"ZTRSM ", &info, (ftnlen)6); + return 0; + } + if (*m == 0 || *n == 0) { + return 0; + } + if (alpha->r == 0. && alpha->i == 0.) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + b[i__3].r = 0., b[i__3].i = 0.; + } + } + return 0; + } + if (lside) { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + for (k = *m; k >= 1; --k) { + i__2 = k + j * b_dim1; + if (b[i__2].r != 0. || b[i__2].i != 0.) { + if (nounit) { + i__2 = k + j * b_dim1; + z_lmp_div(&z__1, &b[k + j * b_dim1], &a[k + k * a_dim1]); + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * b_dim1; + i__6 = i__ + k * a_dim1; + z__2.r = b[i__5].r * a[i__6].r - b[i__5].i * a[i__6].i, + z__2.i = b[i__5].r * a[i__6].i + b[i__5].i * a[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * b_dim1; + if (b[i__3].r != 0. || b[i__3].i != 0.) { + if (nounit) { + i__3 = k + j * b_dim1; + z_lmp_div(&z__1, &b[k + j * b_dim1], &a[k + k * a_dim1]); + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * b_dim1; + i__7 = i__ + k * a_dim1; + z__2.r = b[i__6].r * a[i__7].r - b[i__6].i * a[i__7].i, + z__2.i = b[i__6].r * a[i__7].i + b[i__6].i * a[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + } + } + } else { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + i__4 = k + i__ * a_dim1; + i__5 = k + j * b_dim1; + z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5].i, + z__2.i = a[i__4].r * b[i__5].i + a[i__4].i * b[i__5].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__4 = k + j * b_dim1; + z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, + z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__3 = i__ + j * b_dim1; + b[i__3].r = temp.r, b[i__3].i = temp.i; + } + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + i__2 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, + z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2].r; + temp.r = z__1.r, temp.i = z__1.i; + if (noconj) { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + i__3 = k + i__ * a_dim1; + i__4 = k + j * b_dim1; + z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * b[i__4].i, + z__2.i = a[i__3].r * b[i__4].i + a[i__3].i * b[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[i__ + i__ * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + d_lmp_cnjg(&z__3, &a[k + i__ * a_dim1]); + i__3 = k + j * b_dim1; + z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3].i, + z__2.i = z__3.r * b[i__3].i + z__3.i * b[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[i__ + i__ * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = i__ + j * b_dim1; + b[i__2].r = temp.r, b[i__2].i = temp.i; + } + } + } + } + } else { + if (lsame_(transa, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + i__3 = k + j * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = k + j * a_dim1; + i__7 = i__ + k * b_dim1; + z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, + z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + if (nounit) { + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } else { + for (j = *n; j >= 1; --j) { + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + i__2 = k + j * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = k + j * a_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = a[i__5].r * b[i__6].r - a[i__5].i * b[i__6].i, + z__2.i = a[i__5].r * b[i__6].i + a[i__5].i * b[i__6].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + if (nounit) { + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + j * b_dim1; + i__3 = i__ + j * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + } + } + } else { + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + if (noconj) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[k + k * a_dim1]); + z_lmp_div(&z__1, &c_b1, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, + z__1.i = temp.r * b[i__3].i + temp.i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + i__2 = j + k * a_dim1; + if (a[i__2].r != 0. || a[i__2].i != 0.) { + if (noconj) { + i__2 = j + k * a_dim1; + temp.r = a[i__2].r, temp.i = a[i__2].i; + } else { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * b_dim1; + i__4 = i__ + j * b_dim1; + i__5 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__5].r - temp.i * b[i__5].i, + z__2.i = temp.r * b[i__5].i + temp.i * b[i__5].r; + z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4].i - z__2.i; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + if (alpha->r != 1. || alpha->i != 0.) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__ + k * b_dim1; + i__3 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, + z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3].r; + b[i__2].r = z__1.r, b[i__2].i = z__1.i; + } + } + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + if (noconj) { + z_lmp_div(&z__1, &c_b1, &a[k + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } else { + d_lmp_cnjg(&z__2, &a[k + k * a_dim1]); + z_lmp_div(&z__1, &c_b1, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, + z__1.i = temp.r * b[i__4].i + temp.i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + i__3 = j + k * a_dim1; + if (a[i__3].r != 0. || a[i__3].i != 0.) { + if (noconj) { + i__3 = j + k * a_dim1; + temp.r = a[i__3].r, temp.i = a[i__3].i; + } else { + d_lmp_cnjg(&z__1, &a[j + k * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + i__4 = i__ + j * b_dim1; + i__5 = i__ + j * b_dim1; + i__6 = i__ + k * b_dim1; + z__2.r = temp.r * b[i__6].r - temp.i * b[i__6].i, + z__2.i = temp.r * b[i__6].i + temp.i * b[i__6].r; + z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5].i - z__2.i; + b[i__4].r = z__1.r, b[i__4].i = z__1.i; + } + } + } + if (alpha->r != 1. || alpha->i != 0.) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + k * b_dim1; + i__4 = i__ + k * b_dim1; + z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4].i, + z__1.i = alpha->r * b[i__4].i + alpha->i * b[i__4].r; + b[i__3].r = z__1.r, b[i__3].i = z__1.i; + } + } + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrsv.cpp b/lib/linalg/ztrsv.cpp new file mode 100644 index 0000000000..324416d9e3 --- /dev/null +++ b/lib/linalg/ztrsv.cpp @@ -0,0 +1,330 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int ztrsv_(char *uplo, char *trans, char *diag, integer *n, doublecomplex *a, integer *lda, + doublecomplex *x, integer *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *), + d_lmp_cnjg(doublecomplex *, doublecomplex *); + integer i__, j, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (!lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1) && + !lsame_(trans, (char *)"C", (ftnlen)1, (ftnlen)1)) { + info = 2; + } else if (!lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1) && + !lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1, *n)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } + if (info != 0) { + xerbla_((char *)"ZTRSV ", &info, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + noconj = lsame_(trans, (char *)"T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + if (lsame_(trans, (char *)"N", (ftnlen)1, (ftnlen)1)) { + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = j; + z_lmp_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + for (i__ = j - 1; i__ >= 1; --i__) { + i__1 = i__; + i__2 = i__; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + } + } else { + jx = kx + (*n - 1) * *incx; + for (j = *n; j >= 1; --j) { + i__1 = jx; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + if (nounit) { + i__1 = jx; + z_lmp_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + ix = jx; + for (i__ = j - 1; i__ >= 1; --i__) { + ix -= *incx; + i__1 = ix; + i__2 = ix; + i__3 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__2.i = temp.r * a[i__3].i + temp.i * a[i__3].r; + z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; + } + } + jx -= *incx; + } + } + } else { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = j; + z_lmp_div(&z__1, &x[j], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + if (nounit) { + i__2 = jx; + z_lmp_div(&z__1, &x[jx], &a[j + j * a_dim1]); + x[i__2].r = z__1.r, x[i__2].i = z__1.i; + } + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + ix = jx; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + ix += *incx; + i__3 = ix; + i__4 = ix; + i__5 = i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[i__5].r; + z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - z__2.i; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + jx += *incx; + } + } + } + } else { + if (lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1)) { + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = i__; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = j; + x[i__2].r = temp.r, x[i__2].i = temp.i; + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + ix = kx; + i__2 = jx; + temp.r = x[i__2].r, temp.i = x[i__2].i; + if (noconj) { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__ + j * a_dim1; + i__4 = ix; + z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4].i, + z__2.i = a[i__3].r * x[i__4].i + a[i__3].i * x[i__4].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, + z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__2 = jx; + x[i__2].r = temp.r, x[i__2].i = temp.i; + jx += *incx; + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = i__; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[i__3].i, + z__2.i = a[i__2].r * x[i__3].i + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = i__; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = j; + x[i__1].r = temp.r, x[i__1].i = temp.i; + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + ix = kx; + i__1 = jx; + temp.r = x[i__1].r, temp.i = x[i__1].i; + if (noconj) { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + i__2 = i__ + j * a_dim1; + i__3 = ix; + z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[i__3].i, + z__2.i = a[i__2].r * x[i__3].i + a[i__2].i * x[i__3].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; + } + if (nounit) { + z_lmp_div(&z__1, &temp, &a[j + j * a_dim1]); + temp.r = z__1.r, temp.i = z__1.i; + } + } else { + i__1 = j + 1; + for (i__ = *n; i__ >= i__1; --i__) { + d_lmp_cnjg(&z__3, &a[i__ + j * a_dim1]); + i__2 = ix; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, + z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2].r; + z__1.r = temp.r - z__2.r, z__1.i = temp.i - z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; + } + if (nounit) { + d_lmp_cnjg(&z__2, &a[j + j * a_dim1]); + z_lmp_div(&z__1, &temp, &z__2); + temp.r = z__1.r, temp.i = z__1.i; + } + } + i__1 = jx; + x[i__1].r = temp.r, x[i__1].i = temp.i; + jx -= *incx; + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrti2.cpp b/lib/linalg/ztrti2.cpp new file mode 100644 index 0000000000..00cb4154b3 --- /dev/null +++ b/lib/linalg/ztrti2.cpp @@ -0,0 +1,88 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +int ztrti2_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info, + ftnlen uplo_len, ftnlen diag_len) +{ + integer a_dim1, a_offset, i__1, i__2; + doublecomplex z__1; + void z_lmp_div(doublecomplex *, doublecomplex *, doublecomplex *); + integer j; + doublecomplex ajj; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int zscal_(integer *, doublecomplex *, doublecomplex *, integer *); + logical upper; + extern int ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, + doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZTRTI2", &i__1, (ftnlen)6); + return 0; + } + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (nounit) { + i__2 = j + j * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + a[i__2].r = z__1.r, a[i__2].i = z__1.i; + i__2 = j + j * a_dim1; + z__1.r = -a[i__2].r, z__1.i = -a[i__2].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + i__2 = j - 1; + ztrmv_((char *)"Upper", (char *)"No transpose", diag, &i__2, &a[a_offset], lda, &a[j * a_dim1 + 1], + &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__2 = j - 1; + zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1); + } + } else { + for (j = *n; j >= 1; --j) { + if (nounit) { + i__1 = j + j * a_dim1; + z_lmp_div(&z__1, &c_b1, &a[j + j * a_dim1]); + a[i__1].r = z__1.r, a[i__1].i = z__1.i; + i__1 = j + j * a_dim1; + z__1.r = -a[i__1].r, z__1.i = -a[i__1].i; + ajj.r = z__1.r, ajj.i = z__1.i; + } else { + z__1.r = -1., z__1.i = -0.; + ajj.r = z__1.r, ajj.i = z__1.i; + } + if (j < *n) { + i__1 = *n - j; + ztrmv_((char *)"Lower", (char *)"No transpose", diag, &i__1, &a[j + 1 + (j + 1) * a_dim1], lda, + &a[j + 1 + j * a_dim1], &c__1, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__1 = *n - j; + zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/ztrtri.cpp b/lib/linalg/ztrtri.cpp new file mode 100644 index 0000000000..771d54adb7 --- /dev/null +++ b/lib/linalg/ztrtri.cpp @@ -0,0 +1,112 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublecomplex c_b1 = {1., 0.}; +static integer c__1 = 1; +static integer c_n1 = -1; +static integer c__2 = 2; +int ztrtri_(char *uplo, char *diag, integer *n, doublecomplex *a, integer *lda, integer *info, + ftnlen uplo_len, ftnlen diag_len) +{ + address a__1[2]; + integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; + doublecomplex z__1; + char ch__1[2]; + int s_lmp_cat(char *, char **, integer *, integer *, ftnlen); + integer j, jb, nb, nn; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + logical upper; + extern int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, + ftnlen, ftnlen), + ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, + doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen), + ztrti2_(char *, char *, integer *, doublecomplex *, integer *, integer *, ftnlen, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + logical nounit; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, (char *)"N", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (!nounit && !lsame_(diag, (char *)"U", (ftnlen)1, (ftnlen)1)) { + *info = -2; + } else if (*n < 0) { + *info = -3; + } else if (*lda < max(1, *n)) { + *info = -5; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"ZTRTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (nounit) { + i__1 = *n; + for (*info = 1; *info <= i__1; ++(*info)) { + i__2 = *info + *info * a_dim1; + if (a[i__2].r == 0. && a[i__2].i == 0.) { + return 0; + } + } + *info = 0; + } + i__3[0] = 1, a__1[0] = uplo; + i__3[1] = 1, a__1[1] = diag; + s_lmp_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); + nb = ilaenv_(&c__1, (char *)"ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)2); + if (nb <= 1 || nb >= *n) { + ztrti2_(uplo, diag, n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + i__4 = nb, i__5 = *n - j + 1; + jb = min(i__4, i__5); + i__4 = j - 1; + ztrmm_((char *)"Left", (char *)"Upper", (char *)"No transpose", diag, &i__4, &jb, &c_b1, &a[a_offset], lda, + &a[j * a_dim1 + 1], lda, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__4 = j - 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_((char *)"Right", (char *)"Upper", (char *)"No transpose", diag, &i__4, &jb, &z__1, + &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, + (ftnlen)12, (ftnlen)1); + ztrti2_((char *)"Upper", diag, &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5, (ftnlen)1); + } + } else { + nn = (*n - 1) / nb * nb + 1; + i__2 = -nb; + for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { + i__1 = nb, i__4 = *n - j + 1; + jb = min(i__1, i__4); + if (j + jb <= *n) { + i__1 = *n - j - jb + 1; + ztrmm_((char *)"Left", (char *)"Lower", (char *)"No transpose", diag, &i__1, &jb, &c_b1, + &a[j + jb + (j + jb) * a_dim1], lda, &a[j + jb + j * a_dim1], lda, + (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1); + i__1 = *n - j - jb + 1; + z__1.r = -1., z__1.i = -0.; + ztrsm_((char *)"Right", (char *)"Lower", (char *)"No transpose", diag, &i__1, &jb, &z__1, + &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)5, + (ftnlen)5, (ftnlen)12, (ftnlen)1); + } + ztrti2_((char *)"Lower", diag, &jb, &a[j + j * a_dim1], lda, info, (ftnlen)5, (ftnlen)1); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif