k3s/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr23.go

416 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/blas64"
"gonum.org/v1/gonum/lapack"
)
// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
// Hessenberg matrix to detect and deflate fully converged eigenvalues from a
// trailing principal submatrix using aggressive early deflation [1].
//
// On return, H will be overwritten by a new Hessenberg matrix that is a
// perturbation of an orthogonal similarity transformation of H. It is hoped
// that on output H will have many zero subdiagonal entries.
//
// If wantt is true, the matrix H will be fully updated so that the
// quasi-triangular Schur factor can be computed. If wantt is false, then only
// enough of H will be updated to preserve the eigenvalues.
//
// If wantz is true, the orthogonal similarity transformation will be
// accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
//
// ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
// of H. It must hold that
// 0 <= ilo <= ihi < n if n > 0,
// ilo == 0 and ihi == -1 if n == 0,
// and the block must be isolated, that is, it must hold that
// ktop == 0 or H[ktop,ktop-1] == 0,
// kbot == n-1 or H[kbot+1,kbot] == 0,
// otherwise Dlaqr23 will panic.
//
// nw is the deflation window size. It must hold that
// 0 <= nw <= kbot-ktop+1,
// otherwise Dlaqr23 will panic.
//
// iloz and ihiz specify the rows of the n×n matrix Z to which transformations
// will be applied if wantz is true. It must hold that
// 0 <= iloz <= ktop, and kbot <= ihiz < n,
// otherwise Dlaqr23 will panic.
//
// sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
//
// v and ldv represent an nw×nw work matrix.
// t and ldt represent an nw×nh work matrix, and nh must be at least nw.
// wv and ldwv represent an nv×nw work matrix.
//
// work must have length at least lwork and lwork must be at least max(1,2*nw),
// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
// efficiency. On return, work[0] will contain the optimal value of lwork.
//
// If lwork is -1, instead of performing Dlaqr23, the function only estimates the
// optimal workspace size and stores it into work[0]. Neither h nor z are
// accessed.
//
// recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
// as DLAQR3, for recur == 0 it behaves as DLAQR2.
//
// On return, ns and nd will contain respectively the number of unconverged
// (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
// sr and si.
//
// On return, the real and imaginary parts of approximate eigenvalues that may
// be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
// and si[kbot-nd-ns+1:kbot-nd+1].
//
// On return, the real and imaginary parts of converged eigenvalues will be
// stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
//
// References:
// [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
// Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
// URL: http://dx.doi.org/10.1137/S0895479801384585
//
func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
switch {
case n < 0:
panic(nLT0)
case ktop < 0 || max(0, n-1) < ktop:
panic(badKtop)
case kbot < min(ktop, n-1) || n <= kbot:
panic(badKbot)
case nw < 0 || kbot-ktop+1+1 < nw:
panic(badNw)
case ldh < max(1, n):
panic(badLdH)
case wantz && (iloz < 0 || ktop < iloz):
panic(badIloz)
case wantz && (ihiz < kbot || n <= ihiz):
panic(badIhiz)
case ldz < 1, wantz && ldz < n:
panic(badLdZ)
case ldv < max(1, nw):
panic(badLdV)
case nh < nw:
panic(badNh)
case ldt < max(1, nh):
panic(badLdT)
case nv < 0:
panic(nvLT0)
case ldwv < max(1, nw):
panic(badLdWV)
case lwork < max(1, 2*nw) && lwork != -1:
panic(badLWork)
case len(work) < max(1, lwork):
panic(shortWork)
case recur < 0:
panic(recurLT0)
}
// Quick return for zero window size.
if nw == 0 {
work[0] = 1
return 0, 0
}
// LAPACK code does not enforce the documented behavior
// nw <= kbot-ktop+1
// but we do (we panic above).
jw := nw
lwkopt := max(1, 2*nw)
if jw > 2 {
// Workspace query call to Dgehrd.
impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1)
lwk1 := int(work[0])
// Workspace query call to Dormhr.
impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
lwk2 := int(work[0])
if recur > 0 {
// Workspace query call to Dlaqr04.
impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1)
lwk3 := int(work[0])
// Optimal workspace.
lwkopt = max(jw+max(lwk1, lwk2), lwk3)
} else {
// Optimal workspace.
lwkopt = jw + max(lwk1, lwk2)
}
}
// Quick return in case of workspace query.
if lwork == -1 {
work[0] = float64(lwkopt)
return 0, 0
}
// Check input slices only if not doing workspace query.
switch {
case len(h) < (n-1)*ldh+n:
panic(shortH)
case len(v) < (nw-1)*ldv+nw:
panic(shortV)
case len(t) < (nw-1)*ldt+nh:
panic(shortT)
case len(wv) < (nv-1)*ldwv+nw:
panic(shortWV)
case wantz && len(z) < (n-1)*ldz+n:
panic(shortZ)
case len(sr) != kbot+1:
panic(badLenSr)
case len(si) != kbot+1:
panic(badLenSi)
case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
panic(notIsolated)
case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
panic(notIsolated)
}
// Machine constants.
ulp := dlamchP
smlnum := float64(n) / ulp * dlamchS
// Setup deflation window.
var s float64
kwtop := kbot - jw + 1
if kwtop != ktop {
s = h[kwtop*ldh+kwtop-1]
}
if kwtop == kbot {
// 1×1 deflation window.
sr[kwtop] = h[kwtop*ldh+kwtop]
si[kwtop] = 0
ns = 1
nd = 0
if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
ns = 0
nd = 1
if kwtop > ktop {
h[kwtop*ldh+kwtop-1] = 0
}
}
work[0] = 1
return ns, nd
}
// Convert to spike-triangular form. In case of a rare QR failure, this
// routine continues to do aggressive early deflation using that part of
// the deflation window that converged using infqr here and there to
// keep track.
impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
bi := blas64.Implementation()
bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
var infqr int
if recur > 0 && jw > nmin {
infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
} else {
infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
}
// Note that ilo == 0 which conveniently coincides with the success
// value of infqr, that is, infqr as an index always points to the first
// converged eigenvalue.
// Dtrexc needs a clean margin near the diagonal.
for j := 0; j < jw-3; j++ {
t[(j+2)*ldt+j] = 0
t[(j+3)*ldt+j] = 0
}
if jw >= 3 {
t[(jw-1)*ldt+jw-3] = 0
}
ns = jw
ilst := infqr
// Deflation detection loop.
for ilst < ns {
bulge := false
if ns >= 2 {
bulge = t[(ns-1)*ldt+ns-2] != 0
}
if !bulge {
// Real eigenvalue.
abst := math.Abs(t[(ns-1)*ldt+ns-1])
if abst == 0 {
abst = math.Abs(s)
}
if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
// Deflatable.
ns--
} else {
// Undeflatable, move it up out of the way.
// Dtrexc can not fail in this case.
_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
ilst++
}
continue
}
// Complex conjugate pair.
abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
if abst == 0 {
abst = math.Abs(s)
}
if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
// Deflatable.
ns -= 2
} else {
// Undeflatable, move them up out of the way.
// Dtrexc does the right thing with ilst in case of a
// rare exchange failure.
_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
ilst += 2
}
}
// Return to Hessenberg form.
if ns == 0 {
s = 0
}
if ns < jw {
// Sorting diagonal blocks of T improves accuracy for graded
// matrices. Bubble sort deals well with exchange failures.
sorted := false
i := ns
for !sorted {
sorted = true
kend := i - 1
i = infqr
var k int
if i == ns-1 || t[(i+1)*ldt+i] == 0 {
k = i + 1
} else {
k = i + 2
}
for k <= kend {
var evi float64
if k == i+1 {
evi = math.Abs(t[i*ldt+i])
} else {
evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
}
var evk float64
if k == kend || t[(k+1)*ldt+k] == 0 {
evk = math.Abs(t[k*ldt+k])
} else {
evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
}
if evi >= evk {
i = k
} else {
sorted = false
_, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
if ok {
i = ilst
} else {
i = k
}
}
if i == kend || t[(i+1)*ldt+i] == 0 {
k = i + 1
} else {
k = i + 2
}
}
}
}
// Restore shift/eigenvalue array from T.
for i := jw - 1; i >= infqr; {
if i == infqr || t[i*ldt+i-1] == 0 {
sr[kwtop+i] = t[i*ldt+i]
si[kwtop+i] = 0
i--
continue
}
aa := t[(i-1)*ldt+i-1]
bb := t[(i-1)*ldt+i]
cc := t[i*ldt+i-1]
dd := t[i*ldt+i]
_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
i -= 2
}
if ns < jw || s == 0 {
if ns > 1 && s != 0 {
// Reflect spike back into lower triangle.
bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
work[0] = 1
impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
}
// Copy updated reduced window into place.
if kwtop > 0 {
h[kwtop*ldh+kwtop-1] = s * v[0]
}
impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
// Accumulate orthogonal matrix in order to update H and Z, if
// requested.
if ns > 1 && s != 0 {
// work[:ns-1] contains the elementary reflectors stored
// by a call to Dgehrd above.
impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
}
// Update vertical slab in H.
var ltop int
if !wantt {
ltop = ktop
}
for krow := ltop; krow < kwtop; krow += nv {
kln := min(nv, kwtop-krow)
bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
1, h[krow*ldh+kwtop:], ldh, v, ldv,
0, wv, ldwv)
impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
}
// Update horizontal slab in H.
if wantt {
for kcol := kbot + 1; kcol < n; kcol += nh {
kln := min(nh, n-kcol)
bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
1, v, ldv, h[kwtop*ldh+kcol:], ldh,
0, t, ldt)
impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
}
}
// Update vertical slab in Z.
if wantz {
for krow := iloz; krow <= ihiz; krow += nv {
kln := min(nv, ihiz-krow+1)
bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
1, z[krow*ldz+kwtop:], ldz, v, ldv,
0, wv, ldwv)
impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
}
}
}
// The number of deflations.
nd = jw - ns
// Shifts are converged eigenvalues that could not be deflated.
// Subtracting infqr from the spike length takes care of the case of a
// rare QR failure while calculating eigenvalues of the deflation
// window.
ns -= infqr
work[0] = float64(lwkopt)
return ns, nd
}