// Copyright ©2013 The Gonum Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package mat import ( "math" "gonum.org/v1/gonum/blas" "gonum.org/v1/gonum/blas/blas64" "gonum.org/v1/gonum/lapack/lapack64" ) // Add adds a and b element-wise, placing the result in the receiver. Add // will panic if the two matrices do not have the same shape. func (m *Dense) Add(a, b Matrix) { ar, ac := a.Dims() br, bc := b.Dims() if ar != br || ac != bc { panic(ErrShape) } aU, _ := untransposeExtract(a) bU, _ := untransposeExtract(b) m.reuseAsNonZeroed(ar, ac) if arm, ok := a.(*Dense); ok { if brm, ok := b.(*Dense); ok { amat, bmat := arm.mat, brm.mat if m != aU { m.checkOverlap(amat) } if m != bU { m.checkOverlap(bmat) } for ja, jb, jm := 0, 0, 0; ja < ar*amat.Stride; ja, jb, jm = ja+amat.Stride, jb+bmat.Stride, jm+m.mat.Stride { for i, v := range amat.Data[ja : ja+ac] { m.mat.Data[i+jm] = v + bmat.Data[i+jb] } } return } } m.checkOverlapMatrix(aU) m.checkOverlapMatrix(bU) var restore func() if m == aU { m, restore = m.isolatedWorkspace(aU) defer restore() } else if m == bU { m, restore = m.isolatedWorkspace(bU) defer restore() } for r := 0; r < ar; r++ { for c := 0; c < ac; c++ { m.set(r, c, a.At(r, c)+b.At(r, c)) } } } // Sub subtracts the matrix b from a, placing the result in the receiver. Sub // will panic if the two matrices do not have the same shape. func (m *Dense) Sub(a, b Matrix) { ar, ac := a.Dims() br, bc := b.Dims() if ar != br || ac != bc { panic(ErrShape) } aU, _ := untransposeExtract(a) bU, _ := untransposeExtract(b) m.reuseAsNonZeroed(ar, ac) if arm, ok := a.(*Dense); ok { if brm, ok := b.(*Dense); ok { amat, bmat := arm.mat, brm.mat if m != aU { m.checkOverlap(amat) } if m != bU { m.checkOverlap(bmat) } for ja, jb, jm := 0, 0, 0; ja < ar*amat.Stride; ja, jb, jm = ja+amat.Stride, jb+bmat.Stride, jm+m.mat.Stride { for i, v := range amat.Data[ja : ja+ac] { m.mat.Data[i+jm] = v - bmat.Data[i+jb] } } return } } m.checkOverlapMatrix(aU) m.checkOverlapMatrix(bU) var restore func() if m == aU { m, restore = m.isolatedWorkspace(aU) defer restore() } else if m == bU { m, restore = m.isolatedWorkspace(bU) defer restore() } for r := 0; r < ar; r++ { for c := 0; c < ac; c++ { m.set(r, c, a.At(r, c)-b.At(r, c)) } } } // MulElem performs element-wise multiplication of a and b, placing the result // in the receiver. MulElem will panic if the two matrices do not have the same // shape. func (m *Dense) MulElem(a, b Matrix) { ar, ac := a.Dims() br, bc := b.Dims() if ar != br || ac != bc { panic(ErrShape) } aU, _ := untransposeExtract(a) bU, _ := untransposeExtract(b) m.reuseAsNonZeroed(ar, ac) if arm, ok := a.(*Dense); ok { if brm, ok := b.(*Dense); ok { amat, bmat := arm.mat, brm.mat if m != aU { m.checkOverlap(amat) } if m != bU { m.checkOverlap(bmat) } for ja, jb, jm := 0, 0, 0; ja < ar*amat.Stride; ja, jb, jm = ja+amat.Stride, jb+bmat.Stride, jm+m.mat.Stride { for i, v := range amat.Data[ja : ja+ac] { m.mat.Data[i+jm] = v * bmat.Data[i+jb] } } return } } m.checkOverlapMatrix(aU) m.checkOverlapMatrix(bU) var restore func() if m == aU { m, restore = m.isolatedWorkspace(aU) defer restore() } else if m == bU { m, restore = m.isolatedWorkspace(bU) defer restore() } for r := 0; r < ar; r++ { for c := 0; c < ac; c++ { m.set(r, c, a.At(r, c)*b.At(r, c)) } } } // DivElem performs element-wise division of a by b, placing the result // in the receiver. DivElem will panic if the two matrices do not have the same // shape. func (m *Dense) DivElem(a, b Matrix) { ar, ac := a.Dims() br, bc := b.Dims() if ar != br || ac != bc { panic(ErrShape) } aU, _ := untransposeExtract(a) bU, _ := untransposeExtract(b) m.reuseAsNonZeroed(ar, ac) if arm, ok := a.(*Dense); ok { if brm, ok := b.(*Dense); ok { amat, bmat := arm.mat, brm.mat if m != aU { m.checkOverlap(amat) } if m != bU { m.checkOverlap(bmat) } for ja, jb, jm := 0, 0, 0; ja < ar*amat.Stride; ja, jb, jm = ja+amat.Stride, jb+bmat.Stride, jm+m.mat.Stride { for i, v := range amat.Data[ja : ja+ac] { m.mat.Data[i+jm] = v / bmat.Data[i+jb] } } return } } m.checkOverlapMatrix(aU) m.checkOverlapMatrix(bU) var restore func() if m == aU { m, restore = m.isolatedWorkspace(aU) defer restore() } else if m == bU { m, restore = m.isolatedWorkspace(bU) defer restore() } for r := 0; r < ar; r++ { for c := 0; c < ac; c++ { m.set(r, c, a.At(r, c)/b.At(r, c)) } } } // Inverse computes the inverse of the matrix a, storing the result into the // receiver. If a is ill-conditioned, a Condition error will be returned. // Note that matrix inversion is numerically unstable, and should generally // be avoided where possible, for example by using the Solve routines. func (m *Dense) Inverse(a Matrix) error { // TODO(btracey): Special case for RawTriangular, etc. r, c := a.Dims() if r != c { panic(ErrSquare) } m.reuseAsNonZeroed(a.Dims()) aU, aTrans := untransposeExtract(a) switch rm := aU.(type) { case *Dense: if m != aU || aTrans { if m == aU || m.checkOverlap(rm.mat) { tmp := getWorkspace(r, c, false) tmp.Copy(a) m.Copy(tmp) putWorkspace(tmp) break } m.Copy(a) } default: m.Copy(a) } ipiv := getInts(r, false) defer putInts(ipiv) ok := lapack64.Getrf(m.mat, ipiv) if !ok { return Condition(math.Inf(1)) } work := getFloats(4*r, false) // must be at least 4*r for cond. lapack64.Getri(m.mat, ipiv, work, -1) if int(work[0]) > 4*r { l := int(work[0]) putFloats(work) work = getFloats(l, false) } else { work = work[:4*r] } defer putFloats(work) lapack64.Getri(m.mat, ipiv, work, len(work)) norm := lapack64.Lange(CondNorm, m.mat, work) rcond := lapack64.Gecon(CondNorm, m.mat, norm, work, ipiv) // reuse ipiv if rcond == 0 { return Condition(math.Inf(1)) } cond := 1 / rcond if cond > ConditionTolerance { return Condition(cond) } return nil } // Mul takes the matrix product of a and b, placing the result in the receiver. // If the number of columns in a does not equal the number of rows in b, Mul will panic. func (m *Dense) Mul(a, b Matrix) { ar, ac := a.Dims() br, bc := b.Dims() if ac != br { panic(ErrShape) } aU, aTrans := untransposeExtract(a) bU, bTrans := untransposeExtract(b) m.reuseAsNonZeroed(ar, bc) var restore func() if m == aU { m, restore = m.isolatedWorkspace(aU) defer restore() } else if m == bU { m, restore = m.isolatedWorkspace(bU) defer restore() } aT := blas.NoTrans if aTrans { aT = blas.Trans } bT := blas.NoTrans if bTrans { bT = blas.Trans } // Some of the cases do not have a transpose option, so create // temporary memory. // C = Aᵀ * B = (Bᵀ * A)ᵀ // Cᵀ = Bᵀ * A. if aU, ok := aU.(*Dense); ok { if restore == nil { m.checkOverlap(aU.mat) } switch bU := bU.(type) { case *Dense: if restore == nil { m.checkOverlap(bU.mat) } blas64.Gemm(aT, bT, 1, aU.mat, bU.mat, 0, m.mat) return case *SymDense: if aTrans { c := getWorkspace(ac, ar, false) blas64.Symm(blas.Left, 1, bU.mat, aU.mat, 0, c.mat) strictCopy(m, c.T()) putWorkspace(c) return } blas64.Symm(blas.Right, 1, bU.mat, aU.mat, 0, m.mat) return case *TriDense: // Trmm updates in place, so copy aU first. if aTrans { c := getWorkspace(ac, ar, false) var tmp Dense tmp.SetRawMatrix(aU.mat) c.Copy(&tmp) bT := blas.Trans if bTrans { bT = blas.NoTrans } blas64.Trmm(blas.Left, bT, 1, bU.mat, c.mat) strictCopy(m, c.T()) putWorkspace(c) return } m.Copy(a) blas64.Trmm(blas.Right, bT, 1, bU.mat, m.mat) return case *VecDense: m.checkOverlap(bU.asGeneral()) bvec := bU.RawVector() if bTrans { // {ar,1} x {1,bc}, which is not a vector. // Instead, construct B as a General. bmat := blas64.General{ Rows: bc, Cols: 1, Stride: bvec.Inc, Data: bvec.Data, } blas64.Gemm(aT, bT, 1, aU.mat, bmat, 0, m.mat) return } cvec := blas64.Vector{ Inc: m.mat.Stride, Data: m.mat.Data, } blas64.Gemv(aT, 1, aU.mat, bvec, 0, cvec) return } } if bU, ok := bU.(*Dense); ok { if restore == nil { m.checkOverlap(bU.mat) } switch aU := aU.(type) { case *SymDense: if bTrans { c := getWorkspace(bc, br, false) blas64.Symm(blas.Right, 1, aU.mat, bU.mat, 0, c.mat) strictCopy(m, c.T()) putWorkspace(c) return } blas64.Symm(blas.Left, 1, aU.mat, bU.mat, 0, m.mat) return case *TriDense: // Trmm updates in place, so copy bU first. if bTrans { c := getWorkspace(bc, br, false) var tmp Dense tmp.SetRawMatrix(bU.mat) c.Copy(&tmp) aT := blas.Trans if aTrans { aT = blas.NoTrans } blas64.Trmm(blas.Right, aT, 1, aU.mat, c.mat) strictCopy(m, c.T()) putWorkspace(c) return } m.Copy(b) blas64.Trmm(blas.Left, aT, 1, aU.mat, m.mat) return case *VecDense: m.checkOverlap(aU.asGeneral()) avec := aU.RawVector() if aTrans { // {1,ac} x {ac, bc} // Transpose B so that the vector is on the right. cvec := blas64.Vector{ Inc: 1, Data: m.mat.Data, } bT := blas.Trans if bTrans { bT = blas.NoTrans } blas64.Gemv(bT, 1, bU.mat, avec, 0, cvec) return } // {ar,1} x {1,bc} which is not a vector result. // Instead, construct A as a General. amat := blas64.General{ Rows: ar, Cols: 1, Stride: avec.Inc, Data: avec.Data, } blas64.Gemm(aT, bT, 1, amat, bU.mat, 0, m.mat) return } } m.checkOverlapMatrix(aU) m.checkOverlapMatrix(bU) row := getFloats(ac, false) defer putFloats(row) for r := 0; r < ar; r++ { for i := range row { row[i] = a.At(r, i) } for c := 0; c < bc; c++ { var v float64 for i, e := range row { v += e * b.At(i, c) } m.mat.Data[r*m.mat.Stride+c] = v } } } // strictCopy copies a into m panicking if the shape of a and m differ. func strictCopy(m *Dense, a Matrix) { r, c := m.Copy(a) if r != m.mat.Rows || c != m.mat.Cols { // Panic with a string since this // is not a user-facing panic. panic(ErrShape.Error()) } } // Exp calculates the exponential of the matrix a, e^a, placing the result // in the receiver. Exp will panic with matrix.ErrShape if a is not square. func (m *Dense) Exp(a Matrix) { // The implementation used here is from Functions of Matrices: Theory and Computation // Chapter 10, Algorithm 10.20. https://doi.org/10.1137/1.9780898717778.ch10 r, c := a.Dims() if r != c { panic(ErrShape) } m.reuseAsNonZeroed(r, r) if r == 1 { m.mat.Data[0] = math.Exp(a.At(0, 0)) return } pade := []struct { theta float64 b []float64 }{ {theta: 0.015, b: []float64{ 120, 60, 12, 1, }}, {theta: 0.25, b: []float64{ 30240, 15120, 3360, 420, 30, 1, }}, {theta: 0.95, b: []float64{ 17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1, }}, {theta: 2.1, b: []float64{ 17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1, }}, } a1 := m a1.Copy(a) v := getWorkspace(r, r, true) vraw := v.RawMatrix() n := r * r vvec := blas64.Vector{N: n, Inc: 1, Data: vraw.Data} defer putWorkspace(v) u := getWorkspace(r, r, true) uraw := u.RawMatrix() uvec := blas64.Vector{N: n, Inc: 1, Data: uraw.Data} defer putWorkspace(u) a2 := getWorkspace(r, r, false) defer putWorkspace(a2) n1 := Norm(a, 1) for i, t := range pade { if n1 > t.theta { continue } // This loop only executes once, so // this is not as horrible as it looks. p := getWorkspace(r, r, true) praw := p.RawMatrix() pvec := blas64.Vector{N: n, Inc: 1, Data: praw.Data} defer putWorkspace(p) for k := 0; k < r; k++ { p.set(k, k, 1) v.set(k, k, t.b[0]) u.set(k, k, t.b[1]) } a2.Mul(a1, a1) for j := 0; j <= i; j++ { p.Mul(p, a2) blas64.Axpy(t.b[2*j+2], pvec, vvec) blas64.Axpy(t.b[2*j+3], pvec, uvec) } u.Mul(a1, u) // Use p as a workspace here and // rename u for the second call's // receiver. vmu, vpu := u, p vpu.Add(v, u) vmu.Sub(v, u) m.Solve(vmu, vpu) return } // Remaining Padé table line. const theta13 = 5.4 b := [...]float64{ 64764752532480000, 32382376266240000, 7771770303897600, 1187353796428800, 129060195264000, 10559470521600, 670442572800, 33522128640, 1323241920, 40840800, 960960, 16380, 182, 1, } s := math.Log2(n1 / theta13) if s >= 0 { s = math.Ceil(s) a1.Scale(1/math.Pow(2, s), a1) } a2.Mul(a1, a1) i := getWorkspace(r, r, true) for j := 0; j < r; j++ { i.set(j, j, 1) } iraw := i.RawMatrix() ivec := blas64.Vector{N: n, Inc: 1, Data: iraw.Data} defer putWorkspace(i) a2raw := a2.RawMatrix() a2vec := blas64.Vector{N: n, Inc: 1, Data: a2raw.Data} a4 := getWorkspace(r, r, false) a4raw := a4.RawMatrix() a4vec := blas64.Vector{N: n, Inc: 1, Data: a4raw.Data} defer putWorkspace(a4) a4.Mul(a2, a2) a6 := getWorkspace(r, r, false) a6raw := a6.RawMatrix() a6vec := blas64.Vector{N: n, Inc: 1, Data: a6raw.Data} defer putWorkspace(a6) a6.Mul(a2, a4) // V = A_6(b_12*A_6 + b_10*A_4 + b_8*A_2) + b_6*A_6 + b_4*A_4 + b_2*A_2 +b_0*I blas64.Axpy(b[12], a6vec, vvec) blas64.Axpy(b[10], a4vec, vvec) blas64.Axpy(b[8], a2vec, vvec) v.Mul(v, a6) blas64.Axpy(b[6], a6vec, vvec) blas64.Axpy(b[4], a4vec, vvec) blas64.Axpy(b[2], a2vec, vvec) blas64.Axpy(b[0], ivec, vvec) // U = A(A_6(b_13*A_6 + b_11*A_4 + b_9*A_2) + b_7*A_6 + b_5*A_4 + b_2*A_3 +b_1*I) blas64.Axpy(b[13], a6vec, uvec) blas64.Axpy(b[11], a4vec, uvec) blas64.Axpy(b[9], a2vec, uvec) u.Mul(u, a6) blas64.Axpy(b[7], a6vec, uvec) blas64.Axpy(b[5], a4vec, uvec) blas64.Axpy(b[3], a2vec, uvec) blas64.Axpy(b[1], ivec, uvec) u.Mul(u, a1) // Use i as a workspace here and // rename u for the second call's // receiver. vmu, vpu := u, i vpu.Add(v, u) vmu.Sub(v, u) m.Solve(vmu, vpu) for ; s > 0; s-- { m.Mul(m, m) } } // Pow calculates the integral power of the matrix a to n, placing the result // in the receiver. Pow will panic if n is negative or if a is not square. func (m *Dense) Pow(a Matrix, n int) { if n < 0 { panic("mat: illegal power") } r, c := a.Dims() if r != c { panic(ErrShape) } m.reuseAsNonZeroed(r, c) // Take possible fast paths. switch n { case 0: for i := 0; i < r; i++ { zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]) m.mat.Data[i*m.mat.Stride+i] = 1 } return case 1: m.Copy(a) return case 2: m.Mul(a, a) return } // Perform iterative exponentiation by squaring in work space. w := getWorkspace(r, r, false) w.Copy(a) s := getWorkspace(r, r, false) s.Copy(a) x := getWorkspace(r, r, false) for n--; n > 0; n >>= 1 { if n&1 != 0 { x.Mul(w, s) w, x = x, w } if n != 1 { x.Mul(s, s) s, x = x, s } } m.Copy(w) putWorkspace(w) putWorkspace(s) putWorkspace(x) } // Scale multiplies the elements of a by f, placing the result in the receiver. // // See the Scaler interface for more information. func (m *Dense) Scale(f float64, a Matrix) { ar, ac := a.Dims() m.reuseAsNonZeroed(ar, ac) aU, aTrans := untransposeExtract(a) if rm, ok := aU.(*Dense); ok { amat := rm.mat if m == aU || m.checkOverlap(amat) { var restore func() m, restore = m.isolatedWorkspace(a) defer restore() } if !aTrans { for ja, jm := 0, 0; ja < ar*amat.Stride; ja, jm = ja+amat.Stride, jm+m.mat.Stride { for i, v := range amat.Data[ja : ja+ac] { m.mat.Data[i+jm] = v * f } } } else { for ja, jm := 0, 0; ja < ac*amat.Stride; ja, jm = ja+amat.Stride, jm+1 { for i, v := range amat.Data[ja : ja+ar] { m.mat.Data[i*m.mat.Stride+jm] = v * f } } } return } m.checkOverlapMatrix(a) for r := 0; r < ar; r++ { for c := 0; c < ac; c++ { m.set(r, c, f*a.At(r, c)) } } } // Apply applies the function fn to each of the elements of a, placing the // resulting matrix in the receiver. The function fn takes a row/column // index and element value and returns some function of that tuple. func (m *Dense) Apply(fn func(i, j int, v float64) float64, a Matrix) { ar, ac := a.Dims() m.reuseAsNonZeroed(ar, ac) aU, aTrans := untransposeExtract(a) if rm, ok := aU.(*Dense); ok { amat := rm.mat if m == aU || m.checkOverlap(amat) { var restore func() m, restore = m.isolatedWorkspace(a) defer restore() } if !aTrans { for j, ja, jm := 0, 0, 0; ja < ar*amat.Stride; j, ja, jm = j+1, ja+amat.Stride, jm+m.mat.Stride { for i, v := range amat.Data[ja : ja+ac] { m.mat.Data[i+jm] = fn(j, i, v) } } } else { for j, ja, jm := 0, 0, 0; ja < ac*amat.Stride; j, ja, jm = j+1, ja+amat.Stride, jm+1 { for i, v := range amat.Data[ja : ja+ar] { m.mat.Data[i*m.mat.Stride+jm] = fn(i, j, v) } } } return } m.checkOverlapMatrix(a) for r := 0; r < ar; r++ { for c := 0; c < ac; c++ { m.set(r, c, fn(r, c, a.At(r, c))) } } } // RankOne performs a rank-one update to the matrix a with the vectors x and // y, where x and y are treated as column vectors. The result is stored in the // receiver. The Outer method can be used instead of RankOne if a is not needed. // m = a + alpha * x * yᵀ func (m *Dense) RankOne(a Matrix, alpha float64, x, y Vector) { ar, ac := a.Dims() if x.Len() != ar { panic(ErrShape) } if y.Len() != ac { panic(ErrShape) } if a != m { aU, _ := untransposeExtract(a) if rm, ok := aU.(*Dense); ok { m.checkOverlap(rm.RawMatrix()) } } var xmat, ymat blas64.Vector fast := true xU, _ := untransposeExtract(x) if rv, ok := xU.(*VecDense); ok { r, c := xU.Dims() xmat = rv.mat m.checkOverlap(generalFromVector(xmat, r, c)) } else { fast = false } yU, _ := untransposeExtract(y) if rv, ok := yU.(*VecDense); ok { r, c := yU.Dims() ymat = rv.mat m.checkOverlap(generalFromVector(ymat, r, c)) } else { fast = false } if fast { if m != a { m.reuseAsNonZeroed(ar, ac) m.Copy(a) } blas64.Ger(alpha, xmat, ymat, m.mat) return } m.reuseAsNonZeroed(ar, ac) for i := 0; i < ar; i++ { for j := 0; j < ac; j++ { m.set(i, j, a.At(i, j)+alpha*x.AtVec(i)*y.AtVec(j)) } } } // Outer calculates the outer product of the vectors x and y, where x and y // are treated as column vectors, and stores the result in the receiver. // m = alpha * x * yᵀ // In order to update an existing matrix, see RankOne. func (m *Dense) Outer(alpha float64, x, y Vector) { r, c := x.Len(), y.Len() m.reuseAsZeroed(r, c) var xmat, ymat blas64.Vector fast := true xU, _ := untransposeExtract(x) if rv, ok := xU.(*VecDense); ok { r, c := xU.Dims() xmat = rv.mat m.checkOverlap(generalFromVector(xmat, r, c)) } else { fast = false } yU, _ := untransposeExtract(y) if rv, ok := yU.(*VecDense); ok { r, c := yU.Dims() ymat = rv.mat m.checkOverlap(generalFromVector(ymat, r, c)) } else { fast = false } if fast { for i := 0; i < r; i++ { zero(m.mat.Data[i*m.mat.Stride : i*m.mat.Stride+c]) } blas64.Ger(alpha, xmat, ymat, m.mat) return } for i := 0; i < r; i++ { for j := 0; j < c; j++ { m.set(i, j, alpha*x.AtVec(i)*y.AtVec(j)) } } }