k3s/vendor/gonum.org/v1/gonum/blas/gonum/level2float32.go

2297 lines
43 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
var _ blas.Float32Level2 = Implementation{}
// Sger performs the rank-one operation
// A += alpha * x * y^T
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) {
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if len(a) < lda*(m-1)+n {
panic(shortA)
}
// Quick return if possible.
if alpha == 0 {
return
}
f32.Ger(uintptr(m), uintptr(n),
alpha,
x, uintptr(incX),
y, uintptr(incY),
a, uintptr(lda))
}
// Sgbmv performs one of the matrix-vector operations
// y = alpha * A * x + beta * y if tA == blas.NoTrans
// y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans
// where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals,
// x and y are vectors, and alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if kL < 0 {
panic(kLLT0)
}
if kU < 0 {
panic(kULT0)
}
if lda < kL+kU+1 {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if m == 0 || n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 {
panic(shortA)
}
lenX := m
lenY := n
if tA == blas.NoTrans {
lenX = n
lenY = m
}
if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) {
panic(shortY)
}
// Quick return if possible.
if alpha == 0 && beta == 1 {
return
}
var kx, ky int
if incX < 0 {
kx = -(lenX - 1) * incX
}
if incY < 0 {
ky = -(lenY - 1) * incY
}
// Form y = beta * y.
if beta != 1 {
if incY == 1 {
if beta == 0 {
for i := range y[:lenY] {
y[i] = 0
}
} else {
f32.ScalUnitary(beta, y[:lenY])
}
} else {
iy := ky
if beta == 0 {
for i := 0; i < lenY; i++ {
y[iy] = 0
iy += incY
}
} else {
if incY > 0 {
f32.ScalInc(beta, y, uintptr(lenY), uintptr(incY))
} else {
f32.ScalInc(beta, y, uintptr(lenY), uintptr(-incY))
}
}
}
}
if alpha == 0 {
return
}
// i and j are indices of the compacted banded matrix.
// off is the offset into the dense matrix (off + j = densej)
nCol := kU + 1 + kL
if tA == blas.NoTrans {
iy := ky
if incX == 1 {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, n+kL-i)
off := max(0, i-kL)
atmp := a[i*lda+l : i*lda+u]
xtmp := x[off : off+u-l]
var sum float32
for j, v := range atmp {
sum += xtmp[j] * v
}
y[iy] += sum * alpha
iy += incY
}
return
}
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, n+kL-i)
off := max(0, i-kL)
atmp := a[i*lda+l : i*lda+u]
jx := kx
var sum float32
for _, v := range atmp {
sum += x[off*incX+jx] * v
jx += incX
}
y[iy] += sum * alpha
iy += incY
}
return
}
if incX == 1 {
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, n+kL-i)
off := max(0, i-kL)
atmp := a[i*lda+l : i*lda+u]
tmp := alpha * x[i]
jy := ky
for _, v := range atmp {
y[jy+off*incY] += tmp * v
jy += incY
}
}
return
}
ix := kx
for i := 0; i < min(m, n+kL); i++ {
l := max(0, kL-i)
u := min(nCol, n+kL-i)
off := max(0, i-kL)
atmp := a[i*lda+l : i*lda+u]
tmp := alpha * x[ix]
jy := ky
for _, v := range atmp {
y[jy+off*incY] += tmp * v
jy += incY
}
ix += incX
}
}
// Strmv performs one of the matrix-vector operations
// x = A * x if tA == blas.NoTrans
// x = A^T * x if tA == blas.Trans or blas.ConjTrans
// where A is an n×n triangular matrix, and x is a vector.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(n-1)+n {
panic(shortA)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
nonUnit := d != blas.Unit
if n == 1 {
if nonUnit {
x[0] *= a[0]
}
return
}
var kx int
if incX <= 0 {
kx = -(n - 1) * incX
}
if tA == blas.NoTrans {
if ul == blas.Upper {
if incX == 1 {
for i := 0; i < n; i++ {
ilda := i * lda
var tmp float32
if nonUnit {
tmp = a[ilda+i] * x[i]
} else {
tmp = x[i]
}
x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n])
}
return
}
ix := kx
for i := 0; i < n; i++ {
ilda := i * lda
var tmp float32
if nonUnit {
tmp = a[ilda+i] * x[ix]
} else {
tmp = x[ix]
}
x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0)
ix += incX
}
return
}
if incX == 1 {
for i := n - 1; i >= 0; i-- {
ilda := i * lda
var tmp float32
if nonUnit {
tmp += a[ilda+i] * x[i]
} else {
tmp = x[i]
}
x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x[:i])
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
ilda := i * lda
var tmp float32
if nonUnit {
tmp = a[ilda+i] * x[ix]
} else {
tmp = x[ix]
}
x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0)
ix -= incX
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
if incX == 1 {
for i := n - 1; i >= 0; i-- {
ilda := i * lda
xi := x[i]
f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n])
if nonUnit {
x[i] *= a[ilda+i]
}
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
ilda := i * lda
xi := x[ix]
f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX))
if nonUnit {
x[ix] *= a[ilda+i]
}
ix -= incX
}
return
}
if incX == 1 {
for i := 0; i < n; i++ {
ilda := i * lda
xi := x[i]
f32.AxpyUnitary(xi, a[ilda:ilda+i], x[:i])
if nonUnit {
x[i] *= a[i*lda+i]
}
}
return
}
ix := kx
for i := 0; i < n; i++ {
ilda := i * lda
xi := x[ix]
f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx))
if nonUnit {
x[ix] *= a[ilda+i]
}
ix += incX
}
}
// Strsv solves one of the systems of equations
// A * x = b if tA == blas.NoTrans
// A^T * x = b if tA == blas.Trans or blas.ConjTrans
// where A is an n×n triangular matrix, and x and b are vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in-place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(n-1)+n {
panic(shortA)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if n == 1 {
if d == blas.NonUnit {
x[0] /= a[0]
}
return
}
var kx int
if incX < 0 {
kx = -(n - 1) * incX
}
nonUnit := d == blas.NonUnit
if tA == blas.NoTrans {
if ul == blas.Upper {
if incX == 1 {
for i := n - 1; i >= 0; i-- {
var sum float32
atmp := a[i*lda+i+1 : i*lda+n]
for j, v := range atmp {
jv := i + j + 1
sum += x[jv] * v
}
x[i] -= sum
if nonUnit {
x[i] /= a[i*lda+i]
}
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
var sum float32
jx := ix + incX
atmp := a[i*lda+i+1 : i*lda+n]
for _, v := range atmp {
sum += x[jx] * v
jx += incX
}
x[ix] -= sum
if nonUnit {
x[ix] /= a[i*lda+i]
}
ix -= incX
}
return
}
if incX == 1 {
for i := 0; i < n; i++ {
var sum float32
atmp := a[i*lda : i*lda+i]
for j, v := range atmp {
sum += x[j] * v
}
x[i] -= sum
if nonUnit {
x[i] /= a[i*lda+i]
}
}
return
}
ix := kx
for i := 0; i < n; i++ {
jx := kx
var sum float32
atmp := a[i*lda : i*lda+i]
for _, v := range atmp {
sum += x[jx] * v
jx += incX
}
x[ix] -= sum
if nonUnit {
x[ix] /= a[i*lda+i]
}
ix += incX
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
if incX == 1 {
for i := 0; i < n; i++ {
if nonUnit {
x[i] /= a[i*lda+i]
}
xi := x[i]
atmp := a[i*lda+i+1 : i*lda+n]
for j, v := range atmp {
jv := j + i + 1
x[jv] -= v * xi
}
}
return
}
ix := kx
for i := 0; i < n; i++ {
if nonUnit {
x[ix] /= a[i*lda+i]
}
xi := x[ix]
jx := kx + (i+1)*incX
atmp := a[i*lda+i+1 : i*lda+n]
for _, v := range atmp {
x[jx] -= v * xi
jx += incX
}
ix += incX
}
return
}
if incX == 1 {
for i := n - 1; i >= 0; i-- {
if nonUnit {
x[i] /= a[i*lda+i]
}
xi := x[i]
atmp := a[i*lda : i*lda+i]
for j, v := range atmp {
x[j] -= v * xi
}
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
if nonUnit {
x[ix] /= a[i*lda+i]
}
xi := x[ix]
jx := kx
atmp := a[i*lda : i*lda+i]
for _, v := range atmp {
x[jx] -= v * xi
jx += incX
}
ix -= incX
}
}
// Ssymv performs the matrix-vector operation
// y = alpha * A * x + beta * y
// where A is an n×n symmetric matrix, x and y are vectors, and alpha and
// beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(n-1)+n {
panic(shortA)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
// Quick return if possible.
if alpha == 0 && beta == 1 {
return
}
// Set up start points
var kx, ky int
if incX < 0 {
kx = -(n - 1) * incX
}
if incY < 0 {
ky = -(n - 1) * incY
}
// Form y = beta * y
if beta != 1 {
if incY == 1 {
if beta == 0 {
for i := range y[:n] {
y[i] = 0
}
} else {
f32.ScalUnitary(beta, y[:n])
}
} else {
iy := ky
if beta == 0 {
for i := 0; i < n; i++ {
y[iy] = 0
iy += incY
}
} else {
if incY > 0 {
f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
} else {
f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
}
}
}
}
if alpha == 0 {
return
}
if n == 1 {
y[0] += alpha * a[0] * x[0]
return
}
if ul == blas.Upper {
if incX == 1 {
iy := ky
for i := 0; i < n; i++ {
xv := x[i] * alpha
sum := x[i] * a[i*lda+i]
jy := ky + (i+1)*incY
atmp := a[i*lda+i+1 : i*lda+n]
for j, v := range atmp {
jp := j + i + 1
sum += x[jp] * v
y[jy] += xv * v
jy += incY
}
y[iy] += alpha * sum
iy += incY
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
xv := x[ix] * alpha
sum := x[ix] * a[i*lda+i]
jx := kx + (i+1)*incX
jy := ky + (i+1)*incY
atmp := a[i*lda+i+1 : i*lda+n]
for _, v := range atmp {
sum += x[jx] * v
y[jy] += xv * v
jx += incX
jy += incY
}
y[iy] += alpha * sum
ix += incX
iy += incY
}
return
}
// Cases where a is lower triangular.
if incX == 1 {
iy := ky
for i := 0; i < n; i++ {
jy := ky
xv := alpha * x[i]
atmp := a[i*lda : i*lda+i]
var sum float32
for j, v := range atmp {
sum += x[j] * v
y[jy] += xv * v
jy += incY
}
sum += x[i] * a[i*lda+i]
sum *= alpha
y[iy] += sum
iy += incY
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
jx := kx
jy := ky
xv := alpha * x[ix]
atmp := a[i*lda : i*lda+i]
var sum float32
for _, v := range atmp {
sum += x[jx] * v
y[jy] += xv * v
jx += incX
jy += incY
}
sum += x[ix] * a[i*lda+i]
sum *= alpha
y[iy] += sum
ix += incX
iy += incY
}
}
// Stbmv performs one of the matrix-vector operations
// x = A * x if tA == blas.NoTrans
// x = A^T * x if tA == blas.Trans or blas.ConjTrans
// where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if lda < k+1 {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(n-1)+k+1 {
panic(shortA)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
var kx int
if incX < 0 {
kx = -(n - 1) * incX
}
nonunit := d != blas.Unit
if tA == blas.NoTrans {
if ul == blas.Upper {
if incX == 1 {
for i := 0; i < n; i++ {
u := min(1+k, n-i)
var sum float32
atmp := a[i*lda:]
xtmp := x[i:]
for j := 1; j < u; j++ {
sum += xtmp[j] * atmp[j]
}
if nonunit {
sum += xtmp[0] * atmp[0]
} else {
sum += xtmp[0]
}
x[i] = sum
}
return
}
ix := kx
for i := 0; i < n; i++ {
u := min(1+k, n-i)
var sum float32
atmp := a[i*lda:]
jx := incX
for j := 1; j < u; j++ {
sum += x[ix+jx] * atmp[j]
jx += incX
}
if nonunit {
sum += x[ix] * atmp[0]
} else {
sum += x[ix]
}
x[ix] = sum
ix += incX
}
return
}
if incX == 1 {
for i := n - 1; i >= 0; i-- {
l := max(0, k-i)
atmp := a[i*lda:]
var sum float32
for j := l; j < k; j++ {
sum += x[i-k+j] * atmp[j]
}
if nonunit {
sum += x[i] * atmp[k]
} else {
sum += x[i]
}
x[i] = sum
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
l := max(0, k-i)
atmp := a[i*lda:]
var sum float32
jx := l * incX
for j := l; j < k; j++ {
sum += x[ix-k*incX+jx] * atmp[j]
jx += incX
}
if nonunit {
sum += x[ix] * atmp[k]
} else {
sum += x[ix]
}
x[ix] = sum
ix -= incX
}
return
}
if ul == blas.Upper {
if incX == 1 {
for i := n - 1; i >= 0; i-- {
u := k + 1
if i < u {
u = i + 1
}
var sum float32
for j := 1; j < u; j++ {
sum += x[i-j] * a[(i-j)*lda+j]
}
if nonunit {
sum += x[i] * a[i*lda]
} else {
sum += x[i]
}
x[i] = sum
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
u := k + 1
if i < u {
u = i + 1
}
var sum float32
jx := incX
for j := 1; j < u; j++ {
sum += x[ix-jx] * a[(i-j)*lda+j]
jx += incX
}
if nonunit {
sum += x[ix] * a[i*lda]
} else {
sum += x[ix]
}
x[ix] = sum
ix -= incX
}
return
}
if incX == 1 {
for i := 0; i < n; i++ {
u := k
if i+k >= n {
u = n - i - 1
}
var sum float32
for j := 0; j < u; j++ {
sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1]
}
if nonunit {
sum += x[i] * a[i*lda+k]
} else {
sum += x[i]
}
x[i] = sum
}
return
}
ix := kx
for i := 0; i < n; i++ {
u := k
if i+k >= n {
u = n - i - 1
}
var (
sum float32
jx int
)
for j := 0; j < u; j++ {
sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1]
jx += incX
}
if nonunit {
sum += x[ix] * a[i*lda+k]
} else {
sum += x[ix]
}
x[ix] = sum
ix += incX
}
}
// Stpmv performs one of the matrix-vector operations
// x = A * x if tA == blas.NoTrans
// x = A^T * x if tA == blas.Trans or blas.ConjTrans
// where A is an n×n triangular matrix in packed format, and x is a vector.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if n < 0 {
panic(nLT0)
}
if incX == 0 {
panic(zeroIncX)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(ap) < n*(n+1)/2 {
panic(shortAP)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
var kx int
if incX < 0 {
kx = -(n - 1) * incX
}
nonUnit := d == blas.NonUnit
var offset int // Offset is the index of (i,i)
if tA == blas.NoTrans {
if ul == blas.Upper {
if incX == 1 {
for i := 0; i < n; i++ {
xi := x[i]
if nonUnit {
xi *= ap[offset]
}
atmp := ap[offset+1 : offset+n-i]
xtmp := x[i+1:]
for j, v := range atmp {
xi += v * xtmp[j]
}
x[i] = xi
offset += n - i
}
return
}
ix := kx
for i := 0; i < n; i++ {
xix := x[ix]
if nonUnit {
xix *= ap[offset]
}
atmp := ap[offset+1 : offset+n-i]
jx := kx + (i+1)*incX
for _, v := range atmp {
xix += v * x[jx]
jx += incX
}
x[ix] = xix
offset += n - i
ix += incX
}
return
}
if incX == 1 {
offset = n*(n+1)/2 - 1
for i := n - 1; i >= 0; i-- {
xi := x[i]
if nonUnit {
xi *= ap[offset]
}
atmp := ap[offset-i : offset]
for j, v := range atmp {
xi += v * x[j]
}
x[i] = xi
offset -= i + 1
}
return
}
ix := kx + (n-1)*incX
offset = n*(n+1)/2 - 1
for i := n - 1; i >= 0; i-- {
xix := x[ix]
if nonUnit {
xix *= ap[offset]
}
atmp := ap[offset-i : offset]
jx := kx
for _, v := range atmp {
xix += v * x[jx]
jx += incX
}
x[ix] = xix
offset -= i + 1
ix -= incX
}
return
}
// Cases where ap is transposed.
if ul == blas.Upper {
if incX == 1 {
offset = n*(n+1)/2 - 1
for i := n - 1; i >= 0; i-- {
xi := x[i]
atmp := ap[offset+1 : offset+n-i]
xtmp := x[i+1:]
for j, v := range atmp {
xtmp[j] += v * xi
}
if nonUnit {
x[i] *= ap[offset]
}
offset -= n - i + 1
}
return
}
ix := kx + (n-1)*incX
offset = n*(n+1)/2 - 1
for i := n - 1; i >= 0; i-- {
xix := x[ix]
jx := kx + (i+1)*incX
atmp := ap[offset+1 : offset+n-i]
for _, v := range atmp {
x[jx] += v * xix
jx += incX
}
if nonUnit {
x[ix] *= ap[offset]
}
offset -= n - i + 1
ix -= incX
}
return
}
if incX == 1 {
for i := 0; i < n; i++ {
xi := x[i]
atmp := ap[offset-i : offset]
for j, v := range atmp {
x[j] += v * xi
}
if nonUnit {
x[i] *= ap[offset]
}
offset += i + 2
}
return
}
ix := kx
for i := 0; i < n; i++ {
xix := x[ix]
jx := kx
atmp := ap[offset-i : offset]
for _, v := range atmp {
x[jx] += v * xix
jx += incX
}
if nonUnit {
x[ix] *= ap[offset]
}
ix += incX
offset += i + 2
}
}
// Stbsv solves one of the systems of equations
// A * x = b if tA == blas.NoTrans
// A^T * x = b if tA == blas.Trans or tA == blas.ConjTrans
// where A is an n×n triangular band matrix with k+1 diagonals,
// and x and b are vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in-place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if lda < k+1 {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(n-1)+k+1 {
panic(shortA)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
var kx int
if incX < 0 {
kx = -(n - 1) * incX
}
nonUnit := d == blas.NonUnit
// Form x = A^-1 x.
// Several cases below use subslices for speed improvement.
// The incX != 1 cases usually do not because incX may be negative.
if tA == blas.NoTrans {
if ul == blas.Upper {
if incX == 1 {
for i := n - 1; i >= 0; i-- {
bands := k
if i+bands >= n {
bands = n - i - 1
}
atmp := a[i*lda+1:]
xtmp := x[i+1 : i+bands+1]
var sum float32
for j, v := range xtmp {
sum += v * atmp[j]
}
x[i] -= sum
if nonUnit {
x[i] /= a[i*lda]
}
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
max := k + 1
if i+max > n {
max = n - i
}
atmp := a[i*lda:]
var (
jx int
sum float32
)
for j := 1; j < max; j++ {
jx += incX
sum += x[ix+jx] * atmp[j]
}
x[ix] -= sum
if nonUnit {
x[ix] /= atmp[0]
}
ix -= incX
}
return
}
if incX == 1 {
for i := 0; i < n; i++ {
bands := k
if i-k < 0 {
bands = i
}
atmp := a[i*lda+k-bands:]
xtmp := x[i-bands : i]
var sum float32
for j, v := range xtmp {
sum += v * atmp[j]
}
x[i] -= sum
if nonUnit {
x[i] /= atmp[bands]
}
}
return
}
ix := kx
for i := 0; i < n; i++ {
bands := k
if i-k < 0 {
bands = i
}
atmp := a[i*lda+k-bands:]
var (
sum float32
jx int
)
for j := 0; j < bands; j++ {
sum += x[ix-bands*incX+jx] * atmp[j]
jx += incX
}
x[ix] -= sum
if nonUnit {
x[ix] /= atmp[bands]
}
ix += incX
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
if incX == 1 {
for i := 0; i < n; i++ {
bands := k
if i-k < 0 {
bands = i
}
var sum float32
for j := 0; j < bands; j++ {
sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j]
}
x[i] -= sum
if nonUnit {
x[i] /= a[i*lda]
}
}
return
}
ix := kx
for i := 0; i < n; i++ {
bands := k
if i-k < 0 {
bands = i
}
var (
sum float32
jx int
)
for j := 0; j < bands; j++ {
sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j]
jx += incX
}
x[ix] -= sum
if nonUnit {
x[ix] /= a[i*lda]
}
ix += incX
}
return
}
if incX == 1 {
for i := n - 1; i >= 0; i-- {
bands := k
if i+bands >= n {
bands = n - i - 1
}
var sum float32
xtmp := x[i+1 : i+1+bands]
for j, v := range xtmp {
sum += v * a[(i+j+1)*lda+k-j-1]
}
x[i] -= sum
if nonUnit {
x[i] /= a[i*lda+k]
}
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
bands := k
if i+bands >= n {
bands = n - i - 1
}
var (
sum float32
jx int
)
for j := 0; j < bands; j++ {
sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1]
jx += incX
}
x[ix] -= sum
if nonUnit {
x[ix] /= a[i*lda+k]
}
ix -= incX
}
}
// Ssbmv performs the matrix-vector operation
// y = alpha * A * x + beta * y
// where A is an n×n symmetric band matrix with k super-diagonals, x and y are
// vectors, and alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if lda < k+1 {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(a) < lda*(n-1)+k+1 {
panic(shortA)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
// Quick return if possible.
if alpha == 0 && beta == 1 {
return
}
// Set up indexes
lenX := n
lenY := n
var kx, ky int
if incX < 0 {
kx = -(lenX - 1) * incX
}
if incY < 0 {
ky = -(lenY - 1) * incY
}
// Form y = beta * y.
if beta != 1 {
if incY == 1 {
if beta == 0 {
for i := range y[:n] {
y[i] = 0
}
} else {
f32.ScalUnitary(beta, y[:n])
}
} else {
iy := ky
if beta == 0 {
for i := 0; i < n; i++ {
y[iy] = 0
iy += incY
}
} else {
if incY > 0 {
f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
} else {
f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
}
}
}
}
if alpha == 0 {
return
}
if ul == blas.Upper {
if incX == 1 {
iy := ky
for i := 0; i < n; i++ {
atmp := a[i*lda:]
tmp := alpha * x[i]
sum := tmp * atmp[0]
u := min(k, n-i-1)
jy := incY
for j := 1; j <= u; j++ {
v := atmp[j]
sum += alpha * x[i+j] * v
y[iy+jy] += tmp * v
jy += incY
}
y[iy] += sum
iy += incY
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
atmp := a[i*lda:]
tmp := alpha * x[ix]
sum := tmp * atmp[0]
u := min(k, n-i-1)
jx := incX
jy := incY
for j := 1; j <= u; j++ {
v := atmp[j]
sum += alpha * x[ix+jx] * v
y[iy+jy] += tmp * v
jx += incX
jy += incY
}
y[iy] += sum
ix += incX
iy += incY
}
return
}
// Casses where a has bands below the diagonal.
if incX == 1 {
iy := ky
for i := 0; i < n; i++ {
l := max(0, k-i)
tmp := alpha * x[i]
jy := l * incY
atmp := a[i*lda:]
for j := l; j < k; j++ {
v := atmp[j]
y[iy] += alpha * v * x[i-k+j]
y[iy-k*incY+jy] += tmp * v
jy += incY
}
y[iy] += tmp * atmp[k]
iy += incY
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
l := max(0, k-i)
tmp := alpha * x[ix]
jx := l * incX
jy := l * incY
atmp := a[i*lda:]
for j := l; j < k; j++ {
v := atmp[j]
y[iy] += alpha * v * x[ix-k*incX+jx]
y[iy-k*incY+jy] += tmp * v
jx += incX
jy += incY
}
y[iy] += tmp * atmp[k]
ix += incX
iy += incY
}
}
// Ssyr performs the symmetric rank-one update
// A += alpha * x * x^T
// where A is an n×n symmetric matrix, and x is a vector.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if len(a) < lda*(n-1)+n {
panic(shortA)
}
// Quick return if possible.
if alpha == 0 {
return
}
lenX := n
var kx int
if incX < 0 {
kx = -(lenX - 1) * incX
}
if ul == blas.Upper {
if incX == 1 {
for i := 0; i < n; i++ {
tmp := x[i] * alpha
if tmp != 0 {
atmp := a[i*lda+i : i*lda+n]
xtmp := x[i:n]
for j, v := range xtmp {
atmp[j] += v * tmp
}
}
}
return
}
ix := kx
for i := 0; i < n; i++ {
tmp := x[ix] * alpha
if tmp != 0 {
jx := ix
atmp := a[i*lda:]
for j := i; j < n; j++ {
atmp[j] += x[jx] * tmp
jx += incX
}
}
ix += incX
}
return
}
// Cases where a is lower triangular.
if incX == 1 {
for i := 0; i < n; i++ {
tmp := x[i] * alpha
if tmp != 0 {
atmp := a[i*lda:]
xtmp := x[:i+1]
for j, v := range xtmp {
atmp[j] += tmp * v
}
}
}
return
}
ix := kx
for i := 0; i < n; i++ {
tmp := x[ix] * alpha
if tmp != 0 {
atmp := a[i*lda:]
jx := kx
for j := 0; j < i+1; j++ {
atmp[j] += tmp * x[jx]
jx += incX
}
}
ix += incX
}
}
// Ssyr2 performs the symmetric rank-two update
// A += alpha * x * y^T + alpha * y * x^T
// where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if len(a) < lda*(n-1)+n {
panic(shortA)
}
// Quick return if possible.
if alpha == 0 {
return
}
var ky, kx int
if incY < 0 {
ky = -(n - 1) * incY
}
if incX < 0 {
kx = -(n - 1) * incX
}
if ul == blas.Upper {
if incX == 1 && incY == 1 {
for i := 0; i < n; i++ {
xi := x[i]
yi := y[i]
atmp := a[i*lda:]
for j := i; j < n; j++ {
atmp[j] += alpha * (xi*y[j] + x[j]*yi)
}
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
jx := kx + i*incX
jy := ky + i*incY
xi := x[ix]
yi := y[iy]
atmp := a[i*lda:]
for j := i; j < n; j++ {
atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
jx += incX
jy += incY
}
ix += incX
iy += incY
}
return
}
if incX == 1 && incY == 1 {
for i := 0; i < n; i++ {
xi := x[i]
yi := y[i]
atmp := a[i*lda:]
for j := 0; j <= i; j++ {
atmp[j] += alpha * (xi*y[j] + x[j]*yi)
}
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
jx := kx
jy := ky
xi := x[ix]
yi := y[iy]
atmp := a[i*lda:]
for j := 0; j <= i; j++ {
atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
jx += incX
jy += incY
}
ix += incX
iy += incY
}
}
// Stpsv solves one of the systems of equations
// A * x = b if tA == blas.NoTrans
// A^T * x = b if tA == blas.Trans or blas.ConjTrans
// where A is an n×n triangular matrix in packed format, and x and b are vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in-place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if n < 0 {
panic(nLT0)
}
if incX == 0 {
panic(zeroIncX)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(ap) < n*(n+1)/2 {
panic(shortAP)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
var kx int
if incX < 0 {
kx = -(n - 1) * incX
}
nonUnit := d == blas.NonUnit
var offset int // Offset is the index of (i,i)
if tA == blas.NoTrans {
if ul == blas.Upper {
offset = n*(n+1)/2 - 1
if incX == 1 {
for i := n - 1; i >= 0; i-- {
atmp := ap[offset+1 : offset+n-i]
xtmp := x[i+1:]
var sum float32
for j, v := range atmp {
sum += v * xtmp[j]
}
x[i] -= sum
if nonUnit {
x[i] /= ap[offset]
}
offset -= n - i + 1
}
return
}
ix := kx + (n-1)*incX
for i := n - 1; i >= 0; i-- {
atmp := ap[offset+1 : offset+n-i]
jx := kx + (i+1)*incX
var sum float32
for _, v := range atmp {
sum += v * x[jx]
jx += incX
}
x[ix] -= sum
if nonUnit {
x[ix] /= ap[offset]
}
ix -= incX
offset -= n - i + 1
}
return
}
if incX == 1 {
for i := 0; i < n; i++ {
atmp := ap[offset-i : offset]
var sum float32
for j, v := range atmp {
sum += v * x[j]
}
x[i] -= sum
if nonUnit {
x[i] /= ap[offset]
}
offset += i + 2
}
return
}
ix := kx
for i := 0; i < n; i++ {
jx := kx
atmp := ap[offset-i : offset]
var sum float32
for _, v := range atmp {
sum += v * x[jx]
jx += incX
}
x[ix] -= sum
if nonUnit {
x[ix] /= ap[offset]
}
ix += incX
offset += i + 2
}
return
}
// Cases where ap is transposed.
if ul == blas.Upper {
if incX == 1 {
for i := 0; i < n; i++ {
if nonUnit {
x[i] /= ap[offset]
}
xi := x[i]
atmp := ap[offset+1 : offset+n-i]
xtmp := x[i+1:]
for j, v := range atmp {
xtmp[j] -= v * xi
}
offset += n - i
}
return
}
ix := kx
for i := 0; i < n; i++ {
if nonUnit {
x[ix] /= ap[offset]
}
xix := x[ix]
atmp := ap[offset+1 : offset+n-i]
jx := kx + (i+1)*incX
for _, v := range atmp {
x[jx] -= v * xix
jx += incX
}
ix += incX
offset += n - i
}
return
}
if incX == 1 {
offset = n*(n+1)/2 - 1
for i := n - 1; i >= 0; i-- {
if nonUnit {
x[i] /= ap[offset]
}
xi := x[i]
atmp := ap[offset-i : offset]
for j, v := range atmp {
x[j] -= v * xi
}
offset -= i + 1
}
return
}
ix := kx + (n-1)*incX
offset = n*(n+1)/2 - 1
for i := n - 1; i >= 0; i-- {
if nonUnit {
x[ix] /= ap[offset]
}
xix := x[ix]
atmp := ap[offset-i : offset]
jx := kx
for _, v := range atmp {
x[jx] -= v * xix
jx += incX
}
ix -= incX
offset -= i + 1
}
}
// Sspmv performs the matrix-vector operation
// y = alpha * A * x + beta * y
// where A is an n×n symmetric matrix in packed format, x and y are vectors,
// and alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if n < 0 {
panic(nLT0)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if len(ap) < n*(n+1)/2 {
panic(shortAP)
}
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
// Quick return if possible.
if alpha == 0 && beta == 1 {
return
}
// Set up start points
var kx, ky int
if incX < 0 {
kx = -(n - 1) * incX
}
if incY < 0 {
ky = -(n - 1) * incY
}
// Form y = beta * y.
if beta != 1 {
if incY == 1 {
if beta == 0 {
for i := range y[:n] {
y[i] = 0
}
} else {
f32.ScalUnitary(beta, y[:n])
}
} else {
iy := ky
if beta == 0 {
for i := 0; i < n; i++ {
y[iy] = 0
iy += incY
}
} else {
if incY > 0 {
f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
} else {
f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
}
}
}
}
if alpha == 0 {
return
}
if n == 1 {
y[0] += alpha * ap[0] * x[0]
return
}
var offset int // Offset is the index of (i,i).
if ul == blas.Upper {
if incX == 1 {
iy := ky
for i := 0; i < n; i++ {
xv := x[i] * alpha
sum := ap[offset] * x[i]
atmp := ap[offset+1 : offset+n-i]
xtmp := x[i+1:]
jy := ky + (i+1)*incY
for j, v := range atmp {
sum += v * xtmp[j]
y[jy] += v * xv
jy += incY
}
y[iy] += alpha * sum
iy += incY
offset += n - i
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
xv := x[ix] * alpha
sum := ap[offset] * x[ix]
atmp := ap[offset+1 : offset+n-i]
jx := kx + (i+1)*incX
jy := ky + (i+1)*incY
for _, v := range atmp {
sum += v * x[jx]
y[jy] += v * xv
jx += incX
jy += incY
}
y[iy] += alpha * sum
ix += incX
iy += incY
offset += n - i
}
return
}
if incX == 1 {
iy := ky
for i := 0; i < n; i++ {
xv := x[i] * alpha
atmp := ap[offset-i : offset]
jy := ky
var sum float32
for j, v := range atmp {
sum += v * x[j]
y[jy] += v * xv
jy += incY
}
sum += ap[offset] * x[i]
y[iy] += alpha * sum
iy += incY
offset += i + 2
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
xv := x[ix] * alpha
atmp := ap[offset-i : offset]
jx := kx
jy := ky
var sum float32
for _, v := range atmp {
sum += v * x[jx]
y[jy] += v * xv
jx += incX
jy += incY
}
sum += ap[offset] * x[ix]
y[iy] += alpha * sum
ix += incX
iy += incY
offset += i + 2
}
}
// Sspr performs the symmetric rank-one operation
// A += alpha * x * x^T
// where A is an n×n symmetric matrix in packed format, x is a vector, and
// alpha is a scalar.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, ap []float32) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if n < 0 {
panic(nLT0)
}
if incX == 0 {
panic(zeroIncX)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if len(ap) < n*(n+1)/2 {
panic(shortAP)
}
// Quick return if possible.
if alpha == 0 {
return
}
lenX := n
var kx int
if incX < 0 {
kx = -(lenX - 1) * incX
}
var offset int // Offset is the index of (i,i).
if ul == blas.Upper {
if incX == 1 {
for i := 0; i < n; i++ {
atmp := ap[offset:]
xv := alpha * x[i]
xtmp := x[i:n]
for j, v := range xtmp {
atmp[j] += xv * v
}
offset += n - i
}
return
}
ix := kx
for i := 0; i < n; i++ {
jx := kx + i*incX
atmp := ap[offset:]
xv := alpha * x[ix]
for j := 0; j < n-i; j++ {
atmp[j] += xv * x[jx]
jx += incX
}
ix += incX
offset += n - i
}
return
}
if incX == 1 {
for i := 0; i < n; i++ {
atmp := ap[offset-i:]
xv := alpha * x[i]
xtmp := x[:i+1]
for j, v := range xtmp {
atmp[j] += xv * v
}
offset += i + 2
}
return
}
ix := kx
for i := 0; i < n; i++ {
jx := kx
atmp := ap[offset-i:]
xv := alpha * x[ix]
for j := 0; j <= i; j++ {
atmp[j] += xv * x[jx]
jx += incX
}
ix += incX
offset += i + 2
}
}
// Sspr2 performs the symmetric rank-2 update
// A += alpha * x * y^T + alpha * y * x^T
// where A is an n×n symmetric matrix in packed format, x and y are vectors,
// and alpha is a scalar.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if n < 0 {
panic(nLT0)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Quick return if possible.
if n == 0 {
return
}
// For zero matrix size the following slice length checks are trivially satisfied.
if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
panic(shortX)
}
if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
panic(shortY)
}
if len(ap) < n*(n+1)/2 {
panic(shortAP)
}
// Quick return if possible.
if alpha == 0 {
return
}
var ky, kx int
if incY < 0 {
ky = -(n - 1) * incY
}
if incX < 0 {
kx = -(n - 1) * incX
}
var offset int // Offset is the index of (i,i).
if ul == blas.Upper {
if incX == 1 && incY == 1 {
for i := 0; i < n; i++ {
atmp := ap[offset:]
xi := x[i]
yi := y[i]
xtmp := x[i:n]
ytmp := y[i:n]
for j, v := range xtmp {
atmp[j] += alpha * (xi*ytmp[j] + v*yi)
}
offset += n - i
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
jx := kx + i*incX
jy := ky + i*incY
atmp := ap[offset:]
xi := x[ix]
yi := y[iy]
for j := 0; j < n-i; j++ {
atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
jx += incX
jy += incY
}
ix += incX
iy += incY
offset += n - i
}
return
}
if incX == 1 && incY == 1 {
for i := 0; i < n; i++ {
atmp := ap[offset-i:]
xi := x[i]
yi := y[i]
xtmp := x[:i+1]
for j, v := range xtmp {
atmp[j] += alpha * (xi*y[j] + v*yi)
}
offset += i + 2
}
return
}
ix := kx
iy := ky
for i := 0; i < n; i++ {
jx := kx
jy := ky
atmp := ap[offset-i:]
for j := 0; j <= i; j++ {
atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy])
jx += incX
jy += incY
}
ix += incX
iy += incY
offset += i + 2
}
}