Browse Source

add packages

pull/496/head
fatedier 7 years ago
parent
commit
9e0fd0c4ef
  1. 30
      Godeps/Godeps.json
  2. 16
      client/proxy.go
  3. 50
      client/vistor.go
  4. 3
      vendor/github.com/fatedier/beego/logs/console.go
  5. 24
      vendor/github.com/fatedier/kcp-go/.gitignore
  6. 15
      vendor/github.com/fatedier/kcp-go/.travis.yml
  7. 22
      vendor/github.com/fatedier/kcp-go/LICENSE
  8. 172
      vendor/github.com/fatedier/kcp-go/README.md
  9. 288
      vendor/github.com/fatedier/kcp-go/crypt.go
  10. BIN
      vendor/github.com/fatedier/kcp-go/donate.png
  11. 303
      vendor/github.com/fatedier/kcp-go/fec.go
  12. BIN
      vendor/github.com/fatedier/kcp-go/frame.png
  13. BIN
      vendor/github.com/fatedier/kcp-go/kcp-go.png
  14. 998
      vendor/github.com/fatedier/kcp-go/kcp.go
  15. 975
      vendor/github.com/fatedier/kcp-go/sess.go
  16. 164
      vendor/github.com/fatedier/kcp-go/snmp.go
  17. 105
      vendor/github.com/fatedier/kcp-go/updater.go
  18. 110
      vendor/github.com/fatedier/kcp-go/xor.go
  19. 14
      vendor/github.com/templexxx/cpufeat/.gitignore
  20. 27
      vendor/github.com/templexxx/cpufeat/LICENSE
  21. 32
      vendor/github.com/templexxx/cpufeat/cpu.go
  22. 7
      vendor/github.com/templexxx/cpufeat/cpu_arm.go
  23. 7
      vendor/github.com/templexxx/cpufeat/cpu_arm64.go
  24. 7
      vendor/github.com/templexxx/cpufeat/cpu_mips.go
  25. 7
      vendor/github.com/templexxx/cpufeat/cpu_mips64.go
  26. 7
      vendor/github.com/templexxx/cpufeat/cpu_mips64le.go
  27. 7
      vendor/github.com/templexxx/cpufeat/cpu_mipsle.go
  28. 7
      vendor/github.com/templexxx/cpufeat/cpu_ppc64.go
  29. 7
      vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go
  30. 7
      vendor/github.com/templexxx/cpufeat/cpu_s390x.go
  31. 59
      vendor/github.com/templexxx/cpufeat/cpu_x86.go
  32. 32
      vendor/github.com/templexxx/cpufeat/cpu_x86.s
  33. 40
      vendor/github.com/templexxx/reedsolomon/.gitignore
  34. 9
      vendor/github.com/templexxx/reedsolomon/.travis.yml
  35. 23
      vendor/github.com/templexxx/reedsolomon/LICENSE
  36. 109
      vendor/github.com/templexxx/reedsolomon/README.md
  37. 156
      vendor/github.com/templexxx/reedsolomon/matrix.go
  38. 280
      vendor/github.com/templexxx/reedsolomon/rs.go
  39. 868
      vendor/github.com/templexxx/reedsolomon/rs_amd64.go
  40. 401
      vendor/github.com/templexxx/reedsolomon/rs_amd64.s
  41. 8
      vendor/github.com/templexxx/reedsolomon/rs_other.go
  42. 44
      vendor/github.com/templexxx/reedsolomon/tbl.go
  43. 1
      vendor/github.com/templexxx/xor/.gitattributes
  44. 18
      vendor/github.com/templexxx/xor/.gitignore
  45. 21
      vendor/github.com/templexxx/xor/LICENSE
  46. 48
      vendor/github.com/templexxx/xor/README.md
  47. 438
      vendor/github.com/templexxx/xor/avx2_amd64.s
  48. 116
      vendor/github.com/templexxx/xor/nosimd.go
  49. 574
      vendor/github.com/templexxx/xor/sse2_amd64.s
  50. 49
      vendor/github.com/templexxx/xor/xor.go
  51. 120
      vendor/github.com/templexxx/xor/xor_amd64.go
  52. 19
      vendor/github.com/templexxx/xor/xor_other.go
  53. 201
      vendor/github.com/tjfoc/gmsm/LICENSE
  54. 291
      vendor/github.com/tjfoc/gmsm/sm4/sm4.go

30
Godeps/Godeps.json generated vendored

@ -1,6 +1,6 @@
{
"ImportPath": "github.com/fatedier/frp",
"GoVersion": "go1.8",
"GoVersion": "go1.9",
"GodepVersion": "v79",
"Packages": [
"./..."
@ -22,8 +22,13 @@
},
{
"ImportPath": "github.com/fatedier/beego/logs",
"Comment": "v1.7.2-72-gf73c369",
"Rev": "f73c3692bbd70a83728cb59b2c0423ff95e4ecea"
"Comment": "v1.7.2-73-g6c6a4f5",
"Rev": "6c6a4f5bd5eb5a39f7e289b8f345b55f75e7e3e8"
},
{
"ImportPath": "github.com/fatedier/kcp-go",
"Comment": "v3.15-35-gcd167d2",
"Rev": "cd167d2f15f451b0f33780ce862fca97adc0331e"
},
{
"ImportPath": "github.com/golang/snappy",
@ -64,6 +69,25 @@
"Comment": "v1.1.4-25-g2402e8e",
"Rev": "2402e8e7a02fc811447d11f881aa9746cdc57983"
},
{
"ImportPath": "github.com/templexxx/cpufeat",
"Rev": "3794dfbfb04749f896b521032f69383f24c3687e"
},
{
"ImportPath": "github.com/templexxx/reedsolomon",
"Comment": "0.1.1-4-g7092926",
"Rev": "7092926d7d05c415fabb892b1464a03f8228ab80"
},
{
"ImportPath": "github.com/templexxx/xor",
"Comment": "0.1.2",
"Rev": "0af8e873c554da75f37f2049cdffda804533d44c"
},
{
"ImportPath": "github.com/tjfoc/gmsm/sm4",
"Comment": "v1.0-42-g21d76de",
"Rev": "21d76dee237dbbc8dfe1510000b9bf2733635aa1"
},
{
"ImportPath": "github.com/vaughan0/go-ini",
"Rev": "a98ad7ee00ec53921f08832bc06ecf7fd600e6a1"

16
client/proxy.go

@ -15,6 +15,7 @@
package client
import (
"bytes"
"fmt"
"io"
"net"
@ -29,6 +30,7 @@ import (
frpIo "github.com/fatedier/frp/utils/io"
"github.com/fatedier/frp/utils/log"
frpNet "github.com/fatedier/frp/utils/net"
"github.com/fatedier/frp/utils/pool"
)
// Proxy defines how to deal with work connections for different proxy type.
@ -248,16 +250,24 @@ func (pxy *XtcpProxy) InWorkConn(conn frpNet.Conn) {
return
}
// Wait for client address at most 10 seconds.
// Wait for client address at most 5 seconds.
var natHoleRespMsg msg.NatHoleResp
clientConn.SetReadDeadline(time.Now().Add(10 * time.Second))
err = msg.ReadMsgInto(clientConn, &natHoleRespMsg)
clientConn.SetReadDeadline(time.Now().Add(5 * time.Second))
buf := pool.GetBuf(1024)
n, err := clientConn.Read(buf)
if err != nil {
pxy.Error("get natHoleRespMsg error: %v", err)
return
}
err = msg.ReadMsgInto(bytes.NewReader(buf[:n]), &natHoleRespMsg)
if err != nil {
pxy.Error("get natHoleRespMsg error: %v", err)
return
}
clientConn.SetReadDeadline(time.Time{})
clientConn.Close()
pxy.Trace("get natHoleRespMsg, sid [%s], client address [%s]", natHoleRespMsg.Sid, natHoleRespMsg.ClientAddr)
// Send sid to vistor udp address.
time.Sleep(time.Second)

50
client/vistor.go

@ -15,9 +15,11 @@
package client
import (
"bytes"
"fmt"
"io"
"net"
"strconv"
"strings"
"sync"
"time"
@ -214,47 +216,67 @@ func (sv *XtcpVistor) handleConn(userConn frpNet.Conn) {
}
// Wait for client address at most 10 seconds.
var natHoleResp msg.NatHoleResp
var natHoleRespMsg msg.NatHoleResp
vistorConn.SetReadDeadline(time.Now().Add(10 * time.Second))
err = msg.ReadMsgInto(vistorConn, &natHoleResp)
buf := pool.GetBuf(1024)
n, err := vistorConn.Read(buf)
if err != nil {
sv.Warn("get natHoleRespMsg error: %v", err)
return
}
err = msg.ReadMsgInto(bytes.NewReader(buf[:n]), &natHoleRespMsg)
if err != nil {
sv.Warn("get natHoleRespMsg error: %v", err)
return
}
vistorConn.SetReadDeadline(time.Time{})
pool.PutBuf(buf)
sv.Trace("get natHoleRespMsg, sid [%s], client address [%s]", natHoleRespMsg.Sid, natHoleRespMsg.ClientAddr)
// Close vistorConn, so we can use it's local address.
vistorConn.Close()
// Send detect message for all ports of client in case different NAT type.
array := strings.Split(natHoleResp.ClientAddr, ":")
if len(array) <= 0 {
sv.Error("get natHoleResp client address error: %s", natHoleResp.ClientAddr)
// Send detect message.
array := strings.Split(natHoleRespMsg.ClientAddr, ":")
if len(array) <= 1 {
sv.Error("get natHoleResp client address error: %s", natHoleRespMsg.ClientAddr)
return
}
laddr, _ := net.ResolveUDPAddr("udp", vistorConn.LocalAddr().String())
for i := 1000; i < 65000; i++ {
sv.sendDetectMsg(array[0], int64(i), laddr)
/*
for i := 1000; i < 65000; i++ {
sv.sendDetectMsg(array[0], int64(i), laddr, "a")
}
*/
port, err := strconv.ParseInt(array[1], 10, 64)
if err != nil {
sv.Error("get natHoleResp client address error: %s", natHoleRespMsg.ClientAddr)
return
}
sv.sendDetectMsg(array[0], int64(port), laddr, []byte(natHoleRespMsg.Sid))
sv.Trace("send all detect msg done")
// Listen for vistorConn's address and wait for client connection.
lConn, _ := net.ListenUDP("udp", laddr)
lConn.SetReadDeadline(time.Now().Add(10 * time.Second))
lConn.SetReadDeadline(time.Now().Add(5 * time.Second))
sidBuf := pool.GetBuf(1024)
n, _, err := lConn.ReadFromUDP(sidBuf)
n, _, err = lConn.ReadFromUDP(sidBuf)
if err != nil {
sv.Warn("get sid from client error: %v", err)
return
}
lConn.SetReadDeadline(time.Time{})
if string(sidBuf[:n]) != natHoleResp.Sid {
if string(sidBuf[:n]) != natHoleRespMsg.Sid {
sv.Warn("incorrect sid from client")
return
}
sv.Info("nat hole connection make success, sid [%s]", string(sidBuf[:n]))
pool.PutBuf(sidBuf)
var remote io.ReadWriteCloser
remote, err = frpNet.NewKcpConnFromUdp(lConn, false, natHoleResp.ClientAddr)
remote, err = frpNet.NewKcpConnFromUdp(lConn, false, natHoleRespMsg.ClientAddr)
if err != nil {
sv.Error("create kcp connection from udp connection error: %v", err)
return
@ -275,7 +297,7 @@ func (sv *XtcpVistor) handleConn(userConn frpNet.Conn) {
frpIo.Join(userConn, remote)
}
func (sv *XtcpVistor) sendDetectMsg(addr string, port int64, laddr *net.UDPAddr) (err error) {
func (sv *XtcpVistor) sendDetectMsg(addr string, port int64, laddr *net.UDPAddr, content []byte) (err error) {
daddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", addr, port))
if err != nil {
return err
@ -289,7 +311,7 @@ func (sv *XtcpVistor) sendDetectMsg(addr string, port int64, laddr *net.UDPAddr)
uConn := ipv4.NewConn(tConn)
uConn.SetTTL(3)
tConn.Write([]byte(fmt.Sprintf("%d", port)))
tConn.Write(content)
tConn.Close()
return nil
}

3
vendor/github.com/fatedier/beego/logs/console.go generated vendored

@ -42,6 +42,7 @@ var colors = []brush{
newBrush("1;32"), // Notice green
newBrush("1;34"), // Informational blue
newBrush("1;34"), // Debug blue
newBrush("1;34"), // Trace blue
}
// consoleWriter implements LoggerInterface and writes messages to terminal.
@ -55,7 +56,7 @@ type consoleWriter struct {
func NewConsole() Logger {
cw := &consoleWriter{
lg: newLogWriter(os.Stdout),
Level: LevelDebug,
Level: LevelTrace,
Colorful: runtime.GOOS != "windows",
}
return cw

24
vendor/github.com/fatedier/kcp-go/.gitignore generated vendored

@ -0,0 +1,24 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof

15
vendor/github.com/fatedier/kcp-go/.travis.yml generated vendored

@ -0,0 +1,15 @@
language: go
go:
- 1.9
before_install:
- go get -t -v ./...
install:
- go get github.com/xtaci/kcp-go
script:
- go test -coverprofile=coverage.txt -covermode=atomic -bench .
after_success:
- bash <(curl -s https://codecov.io/bash)

22
vendor/github.com/fatedier/kcp-go/LICENSE generated vendored

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2015 Daniel Fu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

172
vendor/github.com/fatedier/kcp-go/README.md generated vendored

@ -0,0 +1,172 @@
<img src="kcp-go.png" alt="kcp-go" height="50px" />
[![GoDoc][1]][2] [![Powered][9]][10] [![MIT licensed][11]][12] [![Build Status][3]][4] [![Go Report Card][5]][6] [![Coverage Statusd][7]][8]
[1]: https://godoc.org/github.com/xtaci/kcp-go?status.svg
[2]: https://godoc.org/github.com/xtaci/kcp-go
[3]: https://travis-ci.org/xtaci/kcp-go.svg?branch=master
[4]: https://travis-ci.org/xtaci/kcp-go
[5]: https://goreportcard.com/badge/github.com/xtaci/kcp-go
[6]: https://goreportcard.com/report/github.com/xtaci/kcp-go
[7]: https://codecov.io/gh/xtaci/kcp-go/branch/master/graph/badge.svg
[8]: https://codecov.io/gh/xtaci/kcp-go
[9]: https://img.shields.io/badge/KCP-Powered-blue.svg
[10]: https://github.com/skywind3000/kcp
[11]: https://img.shields.io/badge/license-MIT-blue.svg
[12]: LICENSE
## Introduction
**kcp-go** is a **Production-Grade Reliable-UDP** library for [golang](https://golang.org/).
It provides **fast, ordered and error-checked** delivery of streams over **UDP** packets, has been well tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) are running with **kcp-go** at present, including applications like **online games, live broadcasting, file synchronization and network acceleration**.
[Lastest Release](https://github.com/xtaci/kcp-go/releases)
## Features
1. Optimized for **Realtime Online Games, Audio/Video Streaming and Latency-Sensitive Distributed Consensus**.
1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with language specific optimizations.
1. **Cache friendly** and **Memory optimized** design, offers extremely **High Performance** core.
1. Handles **>5K concurrent connections** on a single commodity server.
1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener), a drop-in replacement for [net.TCPConn](https://golang.org/pkg/net/#TCPConn).
1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction)
1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode.
1. **Fixed number of goroutines** created for the entire server application, minimized goroutine context switch.
## Conventions
Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
## Documentation
For complete documentation, see the associated [Godoc](https://godoc.org/github.com/xtaci/kcp-go).
## Specification
<img src="frame.png" alt="Frame Format" height="109px" />
```
+-----------------+
| SESSION |
+-----------------+
| KCP(ARQ) |
+-----------------+
| FEC(OPTIONAL) |
+-----------------+
| CRYPTO(OPTIONAL)|
+-----------------+
| UDP(PACKET) |
+-----------------+
| IP |
+-----------------+
| LINK |
+-----------------+
| PHY |
+-----------------+
(LAYER MODEL OF KCP-GO)
```
## Usage
Client: [full demo](https://github.com/xtaci/kcptun/blob/master/client/main.go)
```go
kcpconn, err := kcp.DialWithOptions("192.168.0.1:10000", nil, 10, 3)
```
Server: [full demo](https://github.com/xtaci/kcptun/blob/master/server/main.go)
```go
lis, err := kcp.ListenWithOptions(":10000", nil, 10, 3)
```
## Performance
```
Model Name: MacBook Pro
Model Identifier: MacBookPro12,1
Processor Name: Intel Core i5
Processor Speed: 2.7 GHz
Number of Processors: 1
Total Number of Cores: 2
L2 Cache (per Core): 256 KB
L3 Cache: 3 MB
Memory: 8 GB
```
```
$ go test -v -run=^$ -bench .
beginning tests, encryption:salsa20, fec:10/3
BenchmarkAES128-4 200000 8256 ns/op 363.33 MB/s 0 B/op 0 allocs/op
BenchmarkAES192-4 200000 9153 ns/op 327.74 MB/s 0 B/op 0 allocs/op
BenchmarkAES256-4 200000 10079 ns/op 297.64 MB/s 0 B/op 0 allocs/op
BenchmarkTEA-4 100000 18643 ns/op 160.91 MB/s 0 B/op 0 allocs/op
BenchmarkXOR-4 5000000 316 ns/op 9486.46 MB/s 0 B/op 0 allocs/op
BenchmarkBlowfish-4 50000 35643 ns/op 84.17 MB/s 0 B/op 0 allocs/op
BenchmarkNone-4 30000000 56.2 ns/op 53371.83 MB/s 0 B/op 0 allocs/op
BenchmarkCast5-4 30000 44744 ns/op 67.05 MB/s 0 B/op 0 allocs/op
Benchmark3DES-4 2000 639839 ns/op 4.69 MB/s 2 B/op 0 allocs/op
BenchmarkTwofish-4 30000 43368 ns/op 69.17 MB/s 0 B/op 0 allocs/op
BenchmarkXTEA-4 30000 57673 ns/op 52.02 MB/s 0 B/op 0 allocs/op
BenchmarkSalsa20-4 300000 3917 ns/op 765.80 MB/s 0 B/op 0 allocs/op
BenchmarkFlush-4 10000000 226 ns/op 0 B/op 0 allocs/op
BenchmarkEchoSpeed4K-4 5000 300030 ns/op 13.65 MB/s 5672 B/op 177 allocs/op
BenchmarkEchoSpeed64K-4 500 3202335 ns/op 20.47 MB/s 73295 B/op 2198 allocs/op
BenchmarkEchoSpeed512K-4 50 24926924 ns/op 21.03 MB/s 659339 B/op 17602 allocs/op
BenchmarkEchoSpeed1M-4 20 64857821 ns/op 16.17 MB/s 1772437 B/op 42869 allocs/op
BenchmarkSinkSpeed4K-4 30000 50230 ns/op 81.54 MB/s 2058 B/op 48 allocs/op
BenchmarkSinkSpeed64K-4 2000 648718 ns/op 101.02 MB/s 31165 B/op 687 allocs/op
BenchmarkSinkSpeed256K-4 300 4635905 ns/op 113.09 MB/s 286229 B/op 5516 allocs/op
BenchmarkSinkSpeed1M-4 200 9566933 ns/op 109.60 MB/s 463771 B/op 10701 allocs/op
PASS
ok _/Users/xtaci/.godeps/src/github.com/xtaci/kcp-go 39.689s
```
## Design Considerations
1. slice vs. container/list
`kcp.flush()` loops through the send queue for retransmission checking for every 20ms(interval).
I've wrote a benchmark for comparing sequential loop through *slice* and *container/list* here:
https://github.com/xtaci/notes/blob/master/golang/benchmark2/cachemiss_test.go
```
BenchmarkLoopSlice-4 2000000000 0.39 ns/op
BenchmarkLoopList-4 100000000 54.6 ns/op
```
List structure introduces **heavy cache misses** compared to slice which owns better **locality**, 5000 connections with 32 window size and 20ms interval will cost 6us/0.03%(cpu) using slice, and 8.7ms/43.5%(cpu) for list for each `kcp.flush()`.
2. Timing accuracy vs. syscall clock_gettime
Timing is **critical** to **RTT estimator**, inaccurate timing introduces false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz), the benchmark for time.Now():
https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
```
BenchmarkNow-4 100000000 15.6 ns/op
```
In kcp-go, after each `kcp.output()` function call, current time will be updated upon return, and each `kcp.flush()` will get current time once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(no packet needs to be sent by `kcp.output()`), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
## Tuning
Q: I'm handling >5K connections on my server. the CPU utilization is high.
A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load.
## Who is using this?
1. https://github.com/xtaci/kcptun -- A Secure Tunnel Based On KCP over UDP.
2. https://github.com/getlantern/lantern -- Lantern delivers fast access to the open Internet.
3. https://github.com/smallnest/rpcx -- A RPC service framework based on net/rpc like alibaba Dubbo and weibo Motan.
4. https://github.com/gonet2/agent -- A gateway for games with stream multiplexing.
5. https://github.com/syncthing/syncthing -- Open Source Continuous File Synchronization.
6. https://play.google.com/store/apps/details?id=com.k17game.k3 -- Battle Zone - Earth 2048, a world-wide strategy game.
## Links
1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++
2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol
3. https://github.com/templexxx/reedsolomon -- Reed-Solomon Erasure Coding in Go

288
vendor/github.com/fatedier/kcp-go/crypt.go generated vendored

@ -0,0 +1,288 @@
package kcp
import (
"crypto/aes"
"crypto/cipher"
"crypto/des"
"crypto/sha1"
"github.com/templexxx/xor"
"github.com/tjfoc/gmsm/sm4"
"golang.org/x/crypto/blowfish"
"golang.org/x/crypto/cast5"
"golang.org/x/crypto/pbkdf2"
"golang.org/x/crypto/salsa20"
"golang.org/x/crypto/tea"
"golang.org/x/crypto/twofish"
"golang.org/x/crypto/xtea"
)
var (
initialVector = []byte{167, 115, 79, 156, 18, 172, 27, 1, 164, 21, 242, 193, 252, 120, 230, 107}
saltxor = `sH3CIVoF#rWLtJo6`
)
// BlockCrypt defines encryption/decryption methods for a given byte slice.
// Notes on implementing: the data to be encrypted contains a builtin
// nonce at the first 16 bytes
type BlockCrypt interface {
// Encrypt encrypts the whole block in src into dst.
// Dst and src may point at the same memory.
Encrypt(dst, src []byte)
// Decrypt decrypts the whole block in src into dst.
// Dst and src may point at the same memory.
Decrypt(dst, src []byte)
}
type salsa20BlockCrypt struct {
key [32]byte
}
// NewSalsa20BlockCrypt https://en.wikipedia.org/wiki/Salsa20
func NewSalsa20BlockCrypt(key []byte) (BlockCrypt, error) {
c := new(salsa20BlockCrypt)
copy(c.key[:], key)
return c, nil
}
func (c *salsa20BlockCrypt) Encrypt(dst, src []byte) {
salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
copy(dst[:8], src[:8])
}
func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) {
salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
copy(dst[:8], src[:8])
}
type sm4BlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
// NewSM4BlockCrypt https://github.com/tjfoc/gmsm/tree/master/sm4
func NewSM4BlockCrypt(key []byte) (BlockCrypt, error) {
c := new(sm4BlockCrypt)
block, err := sm4.NewCipher(key)
if err != nil {
return nil, err
}
c.block = block
c.encbuf = make([]byte, sm4.BlockSize)
c.decbuf = make([]byte, 2*sm4.BlockSize)
return c, nil
}
func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
type twofishBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
// NewTwofishBlockCrypt https://en.wikipedia.org/wiki/Twofish
func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
c := new(twofishBlockCrypt)
block, err := twofish.NewCipher(key)
if err != nil {
return nil, err
}
c.block = block
c.encbuf = make([]byte, twofish.BlockSize)
c.decbuf = make([]byte, 2*twofish.BlockSize)
return c, nil
}
func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
type tripleDESBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
// NewTripleDESBlockCrypt https://en.wikipedia.org/wiki/Triple_DES
func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
c := new(tripleDESBlockCrypt)
block, err := des.NewTripleDESCipher(key)
if err != nil {
return nil, err
}
c.block = block
c.encbuf = make([]byte, des.BlockSize)
c.decbuf = make([]byte, 2*des.BlockSize)
return c, nil
}
func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
type cast5BlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
// NewCast5BlockCrypt https://en.wikipedia.org/wiki/CAST-128
func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
c := new(cast5BlockCrypt)
block, err := cast5.NewCipher(key)
if err != nil {
return nil, err
}
c.block = block
c.encbuf = make([]byte, cast5.BlockSize)
c.decbuf = make([]byte, 2*cast5.BlockSize)
return c, nil
}
func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
type blowfishBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
// NewBlowfishBlockCrypt https://en.wikipedia.org/wiki/Blowfish_(cipher)
func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
c := new(blowfishBlockCrypt)
block, err := blowfish.NewCipher(key)
if err != nil {
return nil, err
}
c.block = block
c.encbuf = make([]byte, blowfish.BlockSize)
c.decbuf = make([]byte, 2*blowfish.BlockSize)
return c, nil
}
func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
type aesBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
// NewAESBlockCrypt https://en.wikipedia.org/wiki/Advanced_Encryption_Standard
func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
c := new(aesBlockCrypt)
block, err := aes.NewCipher(key)
if err != nil {
return nil, err
}
c.block = block
c.encbuf = make([]byte, aes.BlockSize)
c.decbuf = make([]byte, 2*aes.BlockSize)
return c, nil
}
func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
type teaBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
// NewTEABlockCrypt https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm
func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
c := new(teaBlockCrypt)
block, err := tea.NewCipherWithRounds(key, 16)
if err != nil {
return nil, err
}
c.block = block
c.encbuf = make([]byte, tea.BlockSize)
c.decbuf = make([]byte, 2*tea.BlockSize)
return c, nil
}
func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
type xteaBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
// NewXTEABlockCrypt https://en.wikipedia.org/wiki/XTEA
func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
c := new(xteaBlockCrypt)
block, err := xtea.NewCipher(key)
if err != nil {
return nil, err
}
c.block = block
c.encbuf = make([]byte, xtea.BlockSize)
c.decbuf = make([]byte, 2*xtea.BlockSize)
return c, nil
}
func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
type simpleXORBlockCrypt struct {
xortbl []byte
}
// NewSimpleXORBlockCrypt simple xor with key expanding
func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) {
c := new(simpleXORBlockCrypt)
c.xortbl = pbkdf2.Key(key, []byte(saltxor), 32, mtuLimit, sha1.New)
return c, nil
}
func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
type noneBlockCrypt struct{}
// NewNoneBlockCrypt does nothing but copying
func NewNoneBlockCrypt(key []byte) (BlockCrypt, error) {
return new(noneBlockCrypt), nil
}
func (c *noneBlockCrypt) Encrypt(dst, src []byte) { copy(dst, src) }
func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
// packet encryption with local CFB mode
func encrypt(block cipher.Block, dst, src, buf []byte) {
blocksize := block.BlockSize()
tbl := buf[:blocksize]
block.Encrypt(tbl, initialVector)
n := len(src) / blocksize
base := 0
for i := 0; i < n; i++ {
xor.BytesSrc1(dst[base:], src[base:], tbl)
block.Encrypt(tbl, dst[base:])
base += blocksize
}
xor.BytesSrc0(dst[base:], src[base:], tbl)
}
func decrypt(block cipher.Block, dst, src, buf []byte) {
blocksize := block.BlockSize()
tbl := buf[:blocksize]
next := buf[blocksize:]
block.Encrypt(tbl, initialVector)
n := len(src) / blocksize
base := 0
for i := 0; i < n; i++ {
block.Encrypt(next, src[base:])
xor.BytesSrc1(dst[base:], src[base:], tbl)
tbl, next = next, tbl
base += blocksize
}
xor.BytesSrc0(dst[base:], src[base:], tbl)
}

BIN
vendor/github.com/fatedier/kcp-go/donate.png generated vendored

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.3 KiB

303
vendor/github.com/fatedier/kcp-go/fec.go generated vendored

@ -0,0 +1,303 @@
package kcp
import (
"encoding/binary"
"sync/atomic"
"github.com/templexxx/reedsolomon"
)
const (
fecHeaderSize = 6
fecHeaderSizePlus2 = fecHeaderSize + 2 // plus 2B data size
typeData = 0xf1
typeFEC = 0xf2
)
type (
// fecPacket is a decoded FEC packet
fecPacket struct {
seqid uint32
flag uint16
data []byte
}
// fecDecoder for decoding incoming packets
fecDecoder struct {
rxlimit int // queue size limit
dataShards int
parityShards int
shardSize int
rx []fecPacket // ordered receive queue
// caches
decodeCache [][]byte
flagCache []bool
// RS decoder
codec reedsolomon.Encoder
}
)
func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
if dataShards <= 0 || parityShards <= 0 {
return nil
}
if rxlimit < dataShards+parityShards {
return nil
}
fec := new(fecDecoder)
fec.rxlimit = rxlimit
fec.dataShards = dataShards
fec.parityShards = parityShards
fec.shardSize = dataShards + parityShards
enc, err := reedsolomon.New(dataShards, parityShards)
if err != nil {
return nil
}
fec.codec = enc
fec.decodeCache = make([][]byte, fec.shardSize)
fec.flagCache = make([]bool, fec.shardSize)
return fec
}
// decodeBytes a fec packet
func (dec *fecDecoder) decodeBytes(data []byte) fecPacket {
var pkt fecPacket
pkt.seqid = binary.LittleEndian.Uint32(data)
pkt.flag = binary.LittleEndian.Uint16(data[4:])
// allocate memory & copy
buf := xmitBuf.Get().([]byte)[:len(data)-6]
copy(buf, data[6:])
pkt.data = buf
return pkt
}
// decode a fec packet
func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
// insertion
n := len(dec.rx) - 1
insertIdx := 0
for i := n; i >= 0; i-- {
if pkt.seqid == dec.rx[i].seqid { // de-duplicate
xmitBuf.Put(pkt.data)
return nil
} else if _itimediff(pkt.seqid, dec.rx[i].seqid) > 0 { // insertion
insertIdx = i + 1
break
}
}
// insert into ordered rx queue
if insertIdx == n+1 {
dec.rx = append(dec.rx, pkt)
} else {
dec.rx = append(dec.rx, fecPacket{})
copy(dec.rx[insertIdx+1:], dec.rx[insertIdx:]) // shift right
dec.rx[insertIdx] = pkt
}
// shard range for current packet
shardBegin := pkt.seqid - pkt.seqid%uint32(dec.shardSize)
shardEnd := shardBegin + uint32(dec.shardSize) - 1
// max search range in ordered queue for current shard
searchBegin := insertIdx - int(pkt.seqid%uint32(dec.shardSize))
if searchBegin < 0 {
searchBegin = 0
}
searchEnd := searchBegin + dec.shardSize - 1
if searchEnd >= len(dec.rx) {
searchEnd = len(dec.rx) - 1
}
// re-construct datashards
if searchEnd-searchBegin+1 >= dec.dataShards {
var numshard, numDataShard, first, maxlen int
// zero cache
shards := dec.decodeCache
shardsflag := dec.flagCache
for k := range dec.decodeCache {
shards[k] = nil
shardsflag[k] = false
}
// shard assembly
for i := searchBegin; i <= searchEnd; i++ {
seqid := dec.rx[i].seqid
if _itimediff(seqid, shardEnd) > 0 {
break
} else if _itimediff(seqid, shardBegin) >= 0 {
shards[seqid%uint32(dec.shardSize)] = dec.rx[i].data
shardsflag[seqid%uint32(dec.shardSize)] = true
numshard++
if dec.rx[i].flag == typeData {
numDataShard++
}
if numshard == 1 {
first = i
}
if len(dec.rx[i].data) > maxlen {
maxlen = len(dec.rx[i].data)
}
}
}
if numDataShard == dec.dataShards {
// case 1: no lost data shards
dec.rx = dec.freeRange(first, numshard, dec.rx)
} else if numshard >= dec.dataShards {
// case 2: data shard lost, but recoverable from parity shard
for k := range shards {
if shards[k] != nil {
dlen := len(shards[k])
shards[k] = shards[k][:maxlen]
xorBytes(shards[k][dlen:], shards[k][dlen:], shards[k][dlen:])
}
}
if err := dec.codec.ReconstructData(shards); err == nil {
for k := range shards[:dec.dataShards] {
if !shardsflag[k] {
recovered = append(recovered, shards[k])
}
}
}
dec.rx = dec.freeRange(first, numshard, dec.rx)
}
}
// keep rxlimit
if len(dec.rx) > dec.rxlimit {
if dec.rx[0].flag == typeData { // record unrecoverable data
atomic.AddUint64(&DefaultSnmp.FECShortShards, 1)
}
dec.rx = dec.freeRange(0, 1, dec.rx)
}
return
}
// free a range of fecPacket, and zero for GC recycling
func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
for i := first; i < first+n; i++ { // free
xmitBuf.Put(q[i].data)
}
copy(q[first:], q[first+n:])
for i := 0; i < n; i++ { // dereference data
q[len(q)-1-i].data = nil
}
return q[:len(q)-n]
}
type (
// fecEncoder for encoding outgoing packets
fecEncoder struct {
dataShards int
parityShards int
shardSize int
paws uint32 // Protect Against Wrapped Sequence numbers
next uint32 // next seqid
shardCount int // count the number of datashards collected
maxSize int // record maximum data length in datashard
headerOffset int // FEC header offset
payloadOffset int // FEC payload offset
// caches
shardCache [][]byte
encodeCache [][]byte
// RS encoder
codec reedsolomon.Encoder
}
)
func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
if dataShards <= 0 || parityShards <= 0 {
return nil
}
fec := new(fecEncoder)
fec.dataShards = dataShards
fec.parityShards = parityShards
fec.shardSize = dataShards + parityShards
fec.paws = (0xffffffff/uint32(fec.shardSize) - 1) * uint32(fec.shardSize)
fec.headerOffset = offset
fec.payloadOffset = fec.headerOffset + fecHeaderSize
enc, err := reedsolomon.New(dataShards, parityShards)
if err != nil {
return nil
}
fec.codec = enc
// caches
fec.encodeCache = make([][]byte, fec.shardSize)
fec.shardCache = make([][]byte, fec.shardSize)
for k := range fec.shardCache {
fec.shardCache[k] = make([]byte, mtuLimit)
}
return fec
}
// encode the packet, output parity shards if we have enough datashards
// the content of returned parityshards will change in next encode
func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
enc.markData(b[enc.headerOffset:])
binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
// copy data to fec datashards
sz := len(b)
enc.shardCache[enc.shardCount] = enc.shardCache[enc.shardCount][:sz]
copy(enc.shardCache[enc.shardCount], b)
enc.shardCount++
// record max datashard length
if sz > enc.maxSize {
enc.maxSize = sz
}
// calculate Reed-Solomon Erasure Code
if enc.shardCount == enc.dataShards {
// bzero each datashard's tail
for i := 0; i < enc.dataShards; i++ {
shard := enc.shardCache[i]
slen := len(shard)
xorBytes(shard[slen:enc.maxSize], shard[slen:enc.maxSize], shard[slen:enc.maxSize])
}
// construct equal-sized slice with stripped header
cache := enc.encodeCache
for k := range cache {
cache[k] = enc.shardCache[k][enc.payloadOffset:enc.maxSize]
}
// rs encode
if err := enc.codec.Encode(cache); err == nil {
ps = enc.shardCache[enc.dataShards:]
for k := range ps {
enc.markFEC(ps[k][enc.headerOffset:])
ps[k] = ps[k][:enc.maxSize]
}
}
// reset counters to zero
enc.shardCount = 0
enc.maxSize = 0
}
return
}
func (enc *fecEncoder) markData(data []byte) {
binary.LittleEndian.PutUint32(data, enc.next)
binary.LittleEndian.PutUint16(data[4:], typeData)
enc.next++
}
func (enc *fecEncoder) markFEC(data []byte) {
binary.LittleEndian.PutUint32(data, enc.next)
binary.LittleEndian.PutUint16(data[4:], typeFEC)
enc.next = (enc.next + 1) % enc.paws
}

BIN
vendor/github.com/fatedier/kcp-go/frame.png generated vendored

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
vendor/github.com/fatedier/kcp-go/kcp-go.png generated vendored

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.9 KiB

998
vendor/github.com/fatedier/kcp-go/kcp.go generated vendored

@ -0,0 +1,998 @@
// Package kcp - A Fast and Reliable ARQ Protocol
package kcp
import (
"encoding/binary"
"sync/atomic"
)
const (
IKCP_RTO_NDL = 30 // no delay min rto
IKCP_RTO_MIN = 100 // normal min rto
IKCP_RTO_DEF = 200
IKCP_RTO_MAX = 60000
IKCP_CMD_PUSH = 81 // cmd: push data
IKCP_CMD_ACK = 82 // cmd: ack
IKCP_CMD_WASK = 83 // cmd: window probe (ask)
IKCP_CMD_WINS = 84 // cmd: window size (tell)
IKCP_ASK_SEND = 1 // need to send IKCP_CMD_WASK
IKCP_ASK_TELL = 2 // need to send IKCP_CMD_WINS
IKCP_WND_SND = 32
IKCP_WND_RCV = 32
IKCP_MTU_DEF = 1400
IKCP_ACK_FAST = 3
IKCP_INTERVAL = 100
IKCP_OVERHEAD = 24
IKCP_DEADLINK = 20
IKCP_THRESH_INIT = 2
IKCP_THRESH_MIN = 2
IKCP_PROBE_INIT = 7000 // 7 secs to probe window size
IKCP_PROBE_LIMIT = 120000 // up to 120 secs to probe window
)
// output_callback is a prototype which ought capture conn and call conn.Write
type output_callback func(buf []byte, size int)
/* encode 8 bits unsigned int */
func ikcp_encode8u(p []byte, c byte) []byte {
p[0] = c
return p[1:]
}
/* decode 8 bits unsigned int */
func ikcp_decode8u(p []byte, c *byte) []byte {
*c = p[0]
return p[1:]
}
/* encode 16 bits unsigned int (lsb) */
func ikcp_encode16u(p []byte, w uint16) []byte {
binary.LittleEndian.PutUint16(p, w)
return p[2:]
}
/* decode 16 bits unsigned int (lsb) */
func ikcp_decode16u(p []byte, w *uint16) []byte {
*w = binary.LittleEndian.Uint16(p)
return p[2:]
}
/* encode 32 bits unsigned int (lsb) */
func ikcp_encode32u(p []byte, l uint32) []byte {
binary.LittleEndian.PutUint32(p, l)
return p[4:]
}
/* decode 32 bits unsigned int (lsb) */
func ikcp_decode32u(p []byte, l *uint32) []byte {
*l = binary.LittleEndian.Uint32(p)
return p[4:]
}
func _imin_(a, b uint32) uint32 {
if a <= b {
return a
}
return b
}
func _imax_(a, b uint32) uint32 {
if a >= b {
return a
}
return b
}
func _ibound_(lower, middle, upper uint32) uint32 {
return _imin_(_imax_(lower, middle), upper)
}
func _itimediff(later, earlier uint32) int32 {
return (int32)(later - earlier)
}
// segment defines a KCP segment
type segment struct {
conv uint32
cmd uint8
frg uint8
wnd uint16
ts uint32
sn uint32
una uint32
rto uint32
xmit uint32
resendts uint32
fastack uint32
data []byte
}
// encode a segment into buffer
func (seg *segment) encode(ptr []byte) []byte {
ptr = ikcp_encode32u(ptr, seg.conv)
ptr = ikcp_encode8u(ptr, seg.cmd)
ptr = ikcp_encode8u(ptr, seg.frg)
ptr = ikcp_encode16u(ptr, seg.wnd)
ptr = ikcp_encode32u(ptr, seg.ts)
ptr = ikcp_encode32u(ptr, seg.sn)
ptr = ikcp_encode32u(ptr, seg.una)
ptr = ikcp_encode32u(ptr, uint32(len(seg.data)))
atomic.AddUint64(&DefaultSnmp.OutSegs, 1)
return ptr
}
// KCP defines a single KCP connection
type KCP struct {
conv, mtu, mss, state uint32
snd_una, snd_nxt, rcv_nxt uint32
ssthresh uint32
rx_rttvar, rx_srtt int32
rx_rto, rx_minrto uint32
snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe uint32
interval, ts_flush uint32
nodelay, updated uint32
ts_probe, probe_wait uint32
dead_link, incr uint32
fastresend int32
nocwnd, stream int32
snd_queue []segment
rcv_queue []segment
snd_buf []segment
rcv_buf []segment
acklist []ackItem
buffer []byte
output output_callback
}
type ackItem struct {
sn uint32
ts uint32
}
// NewKCP create a new kcp control object, 'conv' must equal in two endpoint
// from the same connection.
func NewKCP(conv uint32, output output_callback) *KCP {
kcp := new(KCP)
kcp.conv = conv
kcp.snd_wnd = IKCP_WND_SND
kcp.rcv_wnd = IKCP_WND_RCV
kcp.rmt_wnd = IKCP_WND_RCV
kcp.mtu = IKCP_MTU_DEF
kcp.mss = kcp.mtu - IKCP_OVERHEAD
kcp.buffer = make([]byte, (kcp.mtu+IKCP_OVERHEAD)*3)
kcp.rx_rto = IKCP_RTO_DEF
kcp.rx_minrto = IKCP_RTO_MIN
kcp.interval = IKCP_INTERVAL
kcp.ts_flush = IKCP_INTERVAL
kcp.ssthresh = IKCP_THRESH_INIT
kcp.dead_link = IKCP_DEADLINK
kcp.output = output
return kcp
}
// newSegment creates a KCP segment
func (kcp *KCP) newSegment(size int) (seg segment) {
seg.data = xmitBuf.Get().([]byte)[:size]
return
}
// delSegment recycles a KCP segment
func (kcp *KCP) delSegment(seg segment) {
xmitBuf.Put(seg.data)
}
// PeekSize checks the size of next message in the recv queue
func (kcp *KCP) PeekSize() (length int) {
if len(kcp.rcv_queue) == 0 {
return -1
}
seg := &kcp.rcv_queue[0]
if seg.frg == 0 {
return len(seg.data)
}
if len(kcp.rcv_queue) < int(seg.frg+1) {
return -1
}
for k := range kcp.rcv_queue {
seg := &kcp.rcv_queue[k]
length += len(seg.data)
if seg.frg == 0 {
break
}
}
return
}
// Recv is user/upper level recv: returns size, returns below zero for EAGAIN
func (kcp *KCP) Recv(buffer []byte) (n int) {
if len(kcp.rcv_queue) == 0 {
return -1
}
peeksize := kcp.PeekSize()
if peeksize < 0 {
return -2
}
if peeksize > len(buffer) {
return -3
}
var fast_recover bool
if len(kcp.rcv_queue) >= int(kcp.rcv_wnd) {
fast_recover = true
}
// merge fragment
count := 0
for k := range kcp.rcv_queue {
seg := &kcp.rcv_queue[k]
copy(buffer, seg.data)
buffer = buffer[len(seg.data):]
n += len(seg.data)
count++
kcp.delSegment(*seg)
if seg.frg == 0 {
break
}
}
if count > 0 {
kcp.rcv_queue = kcp.remove_front(kcp.rcv_queue, count)
}
// move available data from rcv_buf -> rcv_queue
count = 0
for k := range kcp.rcv_buf {
seg := &kcp.rcv_buf[k]
if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
kcp.rcv_nxt++
count++
} else {
break
}
}
if count > 0 {
kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
}
// fast recover
if len(kcp.rcv_queue) < int(kcp.rcv_wnd) && fast_recover {
// ready to send back IKCP_CMD_WINS in ikcp_flush
// tell remote my window size
kcp.probe |= IKCP_ASK_TELL
}
return
}
// Send is user/upper level send, returns below zero for error
func (kcp *KCP) Send(buffer []byte) int {
var count int
if len(buffer) == 0 {
return -1
}
// append to previous segment in streaming mode (if possible)
if kcp.stream != 0 {
n := len(kcp.snd_queue)
if n > 0 {
seg := &kcp.snd_queue[n-1]
if len(seg.data) < int(kcp.mss) {
capacity := int(kcp.mss) - len(seg.data)
extend := capacity
if len(buffer) < capacity {
extend = len(buffer)
}
// grow slice, the underlying cap is guaranteed to
// be larger than kcp.mss
oldlen := len(seg.data)
seg.data = seg.data[:oldlen+extend]
copy(seg.data[oldlen:], buffer)
buffer = buffer[extend:]
}
}
if len(buffer) == 0 {
return 0
}
}
if len(buffer) <= int(kcp.mss) {
count = 1
} else {
count = (len(buffer) + int(kcp.mss) - 1) / int(kcp.mss)
}
if count > 255 {
return -2
}
if count == 0 {
count = 1
}
for i := 0; i < count; i++ {
var size int
if len(buffer) > int(kcp.mss) {
size = int(kcp.mss)
} else {
size = len(buffer)
}
seg := kcp.newSegment(size)
copy(seg.data, buffer[:size])
if kcp.stream == 0 { // message mode
seg.frg = uint8(count - i - 1)
} else { // stream mode
seg.frg = 0
}
kcp.snd_queue = append(kcp.snd_queue, seg)
buffer = buffer[size:]
}
return 0
}
func (kcp *KCP) update_ack(rtt int32) {
// https://tools.ietf.org/html/rfc6298
var rto uint32
if kcp.rx_srtt == 0 {
kcp.rx_srtt = rtt
kcp.rx_rttvar = rtt >> 1
} else {
delta := rtt - kcp.rx_srtt
kcp.rx_srtt += delta >> 3
if delta < 0 {
delta = -delta
}
if rtt < kcp.rx_srtt-kcp.rx_rttvar {
// if the new RTT sample is below the bottom of the range of
// what an RTT measurement is expected to be.
// give an 8x reduced weight versus its normal weighting
kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 5
} else {
kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 2
}
}
rto = uint32(kcp.rx_srtt) + _imax_(kcp.interval, uint32(kcp.rx_rttvar)<<2)
kcp.rx_rto = _ibound_(kcp.rx_minrto, rto, IKCP_RTO_MAX)
}
func (kcp *KCP) shrink_buf() {
if len(kcp.snd_buf) > 0 {
seg := &kcp.snd_buf[0]
kcp.snd_una = seg.sn
} else {
kcp.snd_una = kcp.snd_nxt
}
}
func (kcp *KCP) parse_ack(sn uint32) {
if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
return
}
for k := range kcp.snd_buf {
seg := &kcp.snd_buf[k]
if sn == seg.sn {
kcp.delSegment(*seg)
copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
break
}
if _itimediff(sn, seg.sn) < 0 {
break
}
}
}
func (kcp *KCP) parse_fastack(sn uint32) {
if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
return
}
for k := range kcp.snd_buf {
seg := &kcp.snd_buf[k]
if _itimediff(sn, seg.sn) < 0 {
break
} else if sn != seg.sn {
seg.fastack++
}
}
}
func (kcp *KCP) parse_una(una uint32) {
count := 0
for k := range kcp.snd_buf {
seg := &kcp.snd_buf[k]
if _itimediff(una, seg.sn) > 0 {
kcp.delSegment(*seg)
count++
} else {
break
}
}
if count > 0 {
kcp.snd_buf = kcp.remove_front(kcp.snd_buf, count)
}
}
// ack append
func (kcp *KCP) ack_push(sn, ts uint32) {
kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
}
func (kcp *KCP) parse_data(newseg segment) {
sn := newseg.sn
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
_itimediff(sn, kcp.rcv_nxt) < 0 {
kcp.delSegment(newseg)
return
}
n := len(kcp.rcv_buf) - 1
insert_idx := 0
repeat := false
for i := n; i >= 0; i-- {
seg := &kcp.rcv_buf[i]
if seg.sn == sn {
repeat = true
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
break
}
if _itimediff(sn, seg.sn) > 0 {
insert_idx = i + 1
break
}
}
if !repeat {
if insert_idx == n+1 {
kcp.rcv_buf = append(kcp.rcv_buf, newseg)
} else {
kcp.rcv_buf = append(kcp.rcv_buf, segment{})
copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
kcp.rcv_buf[insert_idx] = newseg
}
} else {
kcp.delSegment(newseg)
}
// move available data from rcv_buf -> rcv_queue
count := 0
for k := range kcp.rcv_buf {
seg := &kcp.rcv_buf[k]
if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
kcp.rcv_nxt++
count++
} else {
break
}
}
if count > 0 {
kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
}
}
// Input when you received a low level packet (eg. UDP packet), call it
// regular indicates a regular packet has received(not from FEC)
func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
una := kcp.snd_una
if len(data) < IKCP_OVERHEAD {
return -1
}
var maxack uint32
var lastackts uint32
var flag int
var inSegs uint64
for {
var ts, sn, length, una, conv uint32
var wnd uint16
var cmd, frg uint8
if len(data) < int(IKCP_OVERHEAD) {
break
}
data = ikcp_decode32u(data, &conv)
if conv != kcp.conv {
return -1
}
data = ikcp_decode8u(data, &cmd)
data = ikcp_decode8u(data, &frg)
data = ikcp_decode16u(data, &wnd)
data = ikcp_decode32u(data, &ts)
data = ikcp_decode32u(data, &sn)
data = ikcp_decode32u(data, &una)
data = ikcp_decode32u(data, &length)
if len(data) < int(length) {
return -2
}
if cmd != IKCP_CMD_PUSH && cmd != IKCP_CMD_ACK &&
cmd != IKCP_CMD_WASK && cmd != IKCP_CMD_WINS {
return -3
}
// only trust window updates from regular packets. i.e: latest update
if regular {
kcp.rmt_wnd = uint32(wnd)
}
kcp.parse_una(una)
kcp.shrink_buf()
if cmd == IKCP_CMD_ACK {
kcp.parse_ack(sn)
kcp.shrink_buf()
if flag == 0 {
flag = 1
maxack = sn
} else if _itimediff(sn, maxack) > 0 {
maxack = sn
}
lastackts = ts
} else if cmd == IKCP_CMD_PUSH {
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
kcp.ack_push(sn, ts)
if _itimediff(sn, kcp.rcv_nxt) >= 0 {
seg := kcp.newSegment(int(length))
seg.conv = conv
seg.cmd = cmd
seg.frg = frg
seg.wnd = wnd
seg.ts = ts
seg.sn = sn
seg.una = una
copy(seg.data, data[:length])
kcp.parse_data(seg)
} else {
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
}
} else {
atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
}
} else if cmd == IKCP_CMD_WASK {
// ready to send back IKCP_CMD_WINS in Ikcp_flush
// tell remote my window size
kcp.probe |= IKCP_ASK_TELL
} else if cmd == IKCP_CMD_WINS {
// do nothing
} else {
return -3
}
inSegs++
data = data[length:]
}
atomic.AddUint64(&DefaultSnmp.InSegs, inSegs)
if flag != 0 && regular {
kcp.parse_fastack(maxack)
current := currentMs()
if _itimediff(current, lastackts) >= 0 {
kcp.update_ack(_itimediff(current, lastackts))
}
}
if _itimediff(kcp.snd_una, una) > 0 {
if kcp.cwnd < kcp.rmt_wnd {
mss := kcp.mss
if kcp.cwnd < kcp.ssthresh {
kcp.cwnd++
kcp.incr += mss
} else {
if kcp.incr < mss {
kcp.incr = mss
}
kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
if (kcp.cwnd+1)*mss <= kcp.incr {
kcp.cwnd++
}
}
if kcp.cwnd > kcp.rmt_wnd {
kcp.cwnd = kcp.rmt_wnd
kcp.incr = kcp.rmt_wnd * mss
}
}
}
if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately
kcp.flush(true)
} else if kcp.rmt_wnd == 0 && len(kcp.acklist) > 0 { // window zero
kcp.flush(true)
}
return 0
}
func (kcp *KCP) wnd_unused() uint16 {
if len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
return uint16(int(kcp.rcv_wnd) - len(kcp.rcv_queue))
}
return 0
}
// flush pending data
func (kcp *KCP) flush(ackOnly bool) {
var seg segment
seg.conv = kcp.conv
seg.cmd = IKCP_CMD_ACK
seg.wnd = kcp.wnd_unused()
seg.una = kcp.rcv_nxt
buffer := kcp.buffer
// flush acknowledges
ptr := buffer
for i, ack := range kcp.acklist {
size := len(buffer) - len(ptr)
if size+IKCP_OVERHEAD > int(kcp.mtu) {
kcp.output(buffer, size)
ptr = buffer
}
// filter jitters caused by bufferbloat
if ack.sn >= kcp.rcv_nxt || len(kcp.acklist)-1 == i {
seg.sn, seg.ts = ack.sn, ack.ts
ptr = seg.encode(ptr)
}
}
kcp.acklist = kcp.acklist[0:0]
if ackOnly { // flash remain ack segments
size := len(buffer) - len(ptr)
if size > 0 {
kcp.output(buffer, size)
}
return
}
// probe window size (if remote window size equals zero)
if kcp.rmt_wnd == 0 {
current := currentMs()
if kcp.probe_wait == 0 {
kcp.probe_wait = IKCP_PROBE_INIT
kcp.ts_probe = current + kcp.probe_wait
} else {
if _itimediff(current, kcp.ts_probe) >= 0 {
if kcp.probe_wait < IKCP_PROBE_INIT {
kcp.probe_wait = IKCP_PROBE_INIT
}
kcp.probe_wait += kcp.probe_wait / 2
if kcp.probe_wait > IKCP_PROBE_LIMIT {
kcp.probe_wait = IKCP_PROBE_LIMIT
}
kcp.ts_probe = current + kcp.probe_wait
kcp.probe |= IKCP_ASK_SEND
}
}
} else {
kcp.ts_probe = 0
kcp.probe_wait = 0
}
// flush window probing commands
if (kcp.probe & IKCP_ASK_SEND) != 0 {
seg.cmd = IKCP_CMD_WASK
size := len(buffer) - len(ptr)
if size+IKCP_OVERHEAD > int(kcp.mtu) {
kcp.output(buffer, size)
ptr = buffer
}
ptr = seg.encode(ptr)
}
// flush window probing commands
if (kcp.probe & IKCP_ASK_TELL) != 0 {
seg.cmd = IKCP_CMD_WINS
size := len(buffer) - len(ptr)
if size+IKCP_OVERHEAD > int(kcp.mtu) {
kcp.output(buffer, size)
ptr = buffer
}
ptr = seg.encode(ptr)
}
kcp.probe = 0
// calculate window size
cwnd := _imin_(kcp.snd_wnd, kcp.rmt_wnd)
if kcp.nocwnd == 0 {
cwnd = _imin_(kcp.cwnd, cwnd)
}
// sliding window, controlled by snd_nxt && sna_una+cwnd
newSegsCount := 0
for k := range kcp.snd_queue {
if _itimediff(kcp.snd_nxt, kcp.snd_una+cwnd) >= 0 {
break
}
newseg := kcp.snd_queue[k]
newseg.conv = kcp.conv
newseg.cmd = IKCP_CMD_PUSH
newseg.sn = kcp.snd_nxt
kcp.snd_buf = append(kcp.snd_buf, newseg)
kcp.snd_nxt++
newSegsCount++
kcp.snd_queue[k].data = nil
}
if newSegsCount > 0 {
kcp.snd_queue = kcp.remove_front(kcp.snd_queue, newSegsCount)
}
// calculate resent
resent := uint32(kcp.fastresend)
if kcp.fastresend <= 0 {
resent = 0xffffffff
}
// check for retransmissions
current := currentMs()
var change, lost, lostSegs, fastRetransSegs, earlyRetransSegs uint64
for k := range kcp.snd_buf {
segment := &kcp.snd_buf[k]
needsend := false
if segment.xmit == 0 { // initial transmit
needsend = true
segment.rto = kcp.rx_rto
segment.resendts = current + segment.rto
} else if _itimediff(current, segment.resendts) >= 0 { // RTO
needsend = true
if kcp.nodelay == 0 {
segment.rto += kcp.rx_rto
} else {
segment.rto += kcp.rx_rto / 2
}
segment.resendts = current + segment.rto
lost++
lostSegs++
} else if segment.fastack >= resent { // fast retransmit
needsend = true
segment.fastack = 0
segment.rto = kcp.rx_rto
segment.resendts = current + segment.rto
change++
fastRetransSegs++
} else if segment.fastack > 0 && newSegsCount == 0 { // early retransmit
needsend = true
segment.fastack = 0
segment.rto = kcp.rx_rto
segment.resendts = current + segment.rto
change++
earlyRetransSegs++
}
if needsend {
segment.xmit++
segment.ts = current
segment.wnd = seg.wnd
segment.una = seg.una
size := len(buffer) - len(ptr)
need := IKCP_OVERHEAD + len(segment.data)
if size+need > int(kcp.mtu) {
kcp.output(buffer, size)
current = currentMs() // time update for a blocking call
ptr = buffer
}
ptr = segment.encode(ptr)
copy(ptr, segment.data)
ptr = ptr[len(segment.data):]
if segment.xmit >= kcp.dead_link {
kcp.state = 0xFFFFFFFF
}
}
}
// flash remain segments
size := len(buffer) - len(ptr)
if size > 0 {
kcp.output(buffer, size)
}
// counter updates
sum := lostSegs
if lostSegs > 0 {
atomic.AddUint64(&DefaultSnmp.LostSegs, lostSegs)
}
if fastRetransSegs > 0 {
atomic.AddUint64(&DefaultSnmp.FastRetransSegs, fastRetransSegs)
sum += fastRetransSegs
}
if earlyRetransSegs > 0 {
atomic.AddUint64(&DefaultSnmp.EarlyRetransSegs, earlyRetransSegs)
sum += earlyRetransSegs
}
if sum > 0 {
atomic.AddUint64(&DefaultSnmp.RetransSegs, sum)
}
// update ssthresh
// rate halving, https://tools.ietf.org/html/rfc6937
if change > 0 {
inflight := kcp.snd_nxt - kcp.snd_una
kcp.ssthresh = inflight / 2
if kcp.ssthresh < IKCP_THRESH_MIN {
kcp.ssthresh = IKCP_THRESH_MIN
}
kcp.cwnd = kcp.ssthresh + resent
kcp.incr = kcp.cwnd * kcp.mss
}
// congestion control, https://tools.ietf.org/html/rfc5681
if lost > 0 {
kcp.ssthresh = cwnd / 2
if kcp.ssthresh < IKCP_THRESH_MIN {
kcp.ssthresh = IKCP_THRESH_MIN
}
kcp.cwnd = 1
kcp.incr = kcp.mss
}
if kcp.cwnd < 1 {
kcp.cwnd = 1
kcp.incr = kcp.mss
}
}
// Update updates state (call it repeatedly, every 10ms-100ms), or you can ask
// ikcp_check when to call it again (without ikcp_input/_send calling).
// 'current' - current timestamp in millisec.
func (kcp *KCP) Update() {
var slap int32
current := currentMs()
if kcp.updated == 0 {
kcp.updated = 1
kcp.ts_flush = current
}
slap = _itimediff(current, kcp.ts_flush)
if slap >= 10000 || slap < -10000 {
kcp.ts_flush = current
slap = 0
}
if slap >= 0 {
kcp.ts_flush += kcp.interval
if _itimediff(current, kcp.ts_flush) >= 0 {
kcp.ts_flush = current + kcp.interval
}
kcp.flush(false)
}
}
// Check determines when should you invoke ikcp_update:
// returns when you should invoke ikcp_update in millisec, if there
// is no ikcp_input/_send calling. you can call ikcp_update in that
// time, instead of call update repeatly.
// Important to reduce unnacessary ikcp_update invoking. use it to
// schedule ikcp_update (eg. implementing an epoll-like mechanism,
// or optimize ikcp_update when handling massive kcp connections)
func (kcp *KCP) Check() uint32 {
current := currentMs()
ts_flush := kcp.ts_flush
tm_flush := int32(0x7fffffff)
tm_packet := int32(0x7fffffff)
minimal := uint32(0)
if kcp.updated == 0 {
return current
}
if _itimediff(current, ts_flush) >= 10000 ||
_itimediff(current, ts_flush) < -10000 {
ts_flush = current
}
if _itimediff(current, ts_flush) >= 0 {
return current
}
tm_flush = _itimediff(ts_flush, current)
for k := range kcp.snd_buf {
seg := &kcp.snd_buf[k]
diff := _itimediff(seg.resendts, current)
if diff <= 0 {
return current
}
if diff < tm_packet {
tm_packet = diff
}
}
minimal = uint32(tm_packet)
if tm_packet >= tm_flush {
minimal = uint32(tm_flush)
}
if minimal >= kcp.interval {
minimal = kcp.interval
}
return current + minimal
}
// SetMtu changes MTU size, default is 1400
func (kcp *KCP) SetMtu(mtu int) int {
if mtu < 50 || mtu < IKCP_OVERHEAD {
return -1
}
buffer := make([]byte, (mtu+IKCP_OVERHEAD)*3)
if buffer == nil {
return -2
}
kcp.mtu = uint32(mtu)
kcp.mss = kcp.mtu - IKCP_OVERHEAD
kcp.buffer = buffer
return 0
}
// NoDelay options
// fastest: ikcp_nodelay(kcp, 1, 20, 2, 1)
// nodelay: 0:disable(default), 1:enable
// interval: internal update timer interval in millisec, default is 100ms
// resend: 0:disable fast resend(default), 1:enable fast resend
// nc: 0:normal congestion control(default), 1:disable congestion control
func (kcp *KCP) NoDelay(nodelay, interval, resend, nc int) int {
if nodelay >= 0 {
kcp.nodelay = uint32(nodelay)
if nodelay != 0 {
kcp.rx_minrto = IKCP_RTO_NDL
} else {
kcp.rx_minrto = IKCP_RTO_MIN
}
}
if interval >= 0 {
if interval > 5000 {
interval = 5000
} else if interval < 10 {
interval = 10
}
kcp.interval = uint32(interval)
}
if resend >= 0 {
kcp.fastresend = int32(resend)
}
if nc >= 0 {
kcp.nocwnd = int32(nc)
}
return 0
}
// WndSize sets maximum window size: sndwnd=32, rcvwnd=32 by default
func (kcp *KCP) WndSize(sndwnd, rcvwnd int) int {
if sndwnd > 0 {
kcp.snd_wnd = uint32(sndwnd)
}
if rcvwnd > 0 {
kcp.rcv_wnd = uint32(rcvwnd)
}
return 0
}
// WaitSnd gets how many packet is waiting to be sent
func (kcp *KCP) WaitSnd() int {
return len(kcp.snd_buf) + len(kcp.snd_queue)
}
// remove front n elements from queue
func (kcp *KCP) remove_front(q []segment, n int) []segment {
newn := copy(q, q[n:])
for i := newn; i < len(q); i++ {
q[i] = segment{} // manual set nil for GC
}
return q[:newn]
}

975
vendor/github.com/fatedier/kcp-go/sess.go generated vendored

@ -0,0 +1,975 @@
package kcp
import (
"crypto/rand"
"encoding/binary"
"hash/crc32"
"io"
"net"
"sync"
"sync/atomic"
"time"
"github.com/pkg/errors"
"golang.org/x/net/ipv4"
)
type errTimeout struct {
error
}
func (errTimeout) Timeout() bool { return true }
func (errTimeout) Temporary() bool { return true }
func (errTimeout) Error() string { return "i/o timeout" }
const (
// 16-bytes magic number for each packet
nonceSize = 16
// 4-bytes packet checksum
crcSize = 4
// overall crypto header size
cryptHeaderSize = nonceSize + crcSize
// maximum packet size
mtuLimit = 1500
// FEC keeps rxFECMulti* (dataShard+parityShard) ordered packets in memory
rxFECMulti = 3
// accept backlog
acceptBacklog = 128
// prerouting(to session) queue
qlen = 128
)
const (
errBrokenPipe = "broken pipe"
errInvalidOperation = "invalid operation"
)
var (
// global packet buffer
// shared among sending/receiving/FEC
xmitBuf sync.Pool
)
func init() {
xmitBuf.New = func() interface{} {
return make([]byte, mtuLimit)
}
}
type (
// UDPSession defines a KCP session implemented by UDP
UDPSession struct {
updaterIdx int // record slice index in updater
conn net.PacketConn // the underlying packet connection
kcp *KCP // KCP ARQ protocol
l *Listener // point to the Listener if it's accepted by Listener
block BlockCrypt // block encryption
// kcp receiving is based on packets
// recvbuf turns packets into stream
recvbuf []byte
bufptr []byte
// extended output buffer(with header)
ext []byte
// FEC
fecDecoder *fecDecoder
fecEncoder *fecEncoder
// settings
remote net.Addr // remote peer address
rd time.Time // read deadline
wd time.Time // write deadline
headerSize int // the overall header size added before KCP frame
ackNoDelay bool // send ack immediately for each incoming packet
writeDelay bool // delay kcp.flush() for Write() for bulk transfer
dup int // duplicate udp packets
// notifications
die chan struct{} // notify session has Closed
chReadEvent chan struct{} // notify Read() can be called without blocking
chWriteEvent chan struct{} // notify Write() can be called without blocking
chErrorEvent chan error // notify Read() have an error
isClosed bool // flag the session has Closed
mu sync.Mutex
}
setReadBuffer interface {
SetReadBuffer(bytes int) error
}
setWriteBuffer interface {
SetWriteBuffer(bytes int) error
}
)
// newUDPSession create a new udp session for client or server
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
sess := new(UDPSession)
sess.die = make(chan struct{})
sess.chReadEvent = make(chan struct{}, 1)
sess.chWriteEvent = make(chan struct{}, 1)
sess.chErrorEvent = make(chan error, 1)
sess.remote = remote
sess.conn = conn
sess.l = l
sess.block = block
sess.recvbuf = make([]byte, mtuLimit)
// FEC initialization
sess.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
if sess.block != nil {
sess.fecEncoder = newFECEncoder(dataShards, parityShards, cryptHeaderSize)
} else {
sess.fecEncoder = newFECEncoder(dataShards, parityShards, 0)
}
// calculate header size
if sess.block != nil {
sess.headerSize += cryptHeaderSize
}
if sess.fecEncoder != nil {
sess.headerSize += fecHeaderSizePlus2
}
// only allocate extended packet buffer
// when the extra header is required
if sess.headerSize > 0 {
sess.ext = make([]byte, mtuLimit)
}
sess.kcp = NewKCP(conv, func(buf []byte, size int) {
if size >= IKCP_OVERHEAD {
sess.output(buf[:size])
}
})
sess.kcp.SetMtu(IKCP_MTU_DEF - sess.headerSize)
// add current session to the global updater,
// which periodically calls sess.update()
updater.addSession(sess)
if sess.l == nil { // it's a client connection
go sess.readLoop()
atomic.AddUint64(&DefaultSnmp.ActiveOpens, 1)
} else {
atomic.AddUint64(&DefaultSnmp.PassiveOpens, 1)
}
currestab := atomic.AddUint64(&DefaultSnmp.CurrEstab, 1)
maxconn := atomic.LoadUint64(&DefaultSnmp.MaxConn)
if currestab > maxconn {
atomic.CompareAndSwapUint64(&DefaultSnmp.MaxConn, maxconn, currestab)
}
return sess
}
// Read implements net.Conn
func (s *UDPSession) Read(b []byte) (n int, err error) {
for {
s.mu.Lock()
if len(s.bufptr) > 0 { // copy from buffer into b
n = copy(b, s.bufptr)
s.bufptr = s.bufptr[n:]
s.mu.Unlock()
return n, nil
}
if s.isClosed {
s.mu.Unlock()
return 0, errors.New(errBrokenPipe)
}
if size := s.kcp.PeekSize(); size > 0 { // peek data size from kcp
atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(size))
if len(b) >= size { // direct write to b
s.kcp.Recv(b)
s.mu.Unlock()
return size, nil
}
// resize kcp receive buffer
// to make sure recvbuf has enough capacity
if cap(s.recvbuf) < size {
s.recvbuf = make([]byte, size)
}
// resize recvbuf slice length
s.recvbuf = s.recvbuf[:size]
s.kcp.Recv(s.recvbuf)
n = copy(b, s.recvbuf) // copy to b
s.bufptr = s.recvbuf[n:] // update pointer
s.mu.Unlock()
return n, nil
}
// read deadline
var timeout *time.Timer
var c <-chan time.Time
if !s.rd.IsZero() {
if time.Now().After(s.rd) {
s.mu.Unlock()
return 0, errTimeout{}
}
delay := s.rd.Sub(time.Now())
timeout = time.NewTimer(delay)
c = timeout.C
}
s.mu.Unlock()
// wait for read event or timeout
select {
case <-s.chReadEvent:
case <-c:
case <-s.die:
case err = <-s.chErrorEvent:
if timeout != nil {
timeout.Stop()
}
return n, err
}
if timeout != nil {
timeout.Stop()
}
}
}
// Write implements net.Conn
func (s *UDPSession) Write(b []byte) (n int, err error) {
for {
s.mu.Lock()
if s.isClosed {
s.mu.Unlock()
return 0, errors.New(errBrokenPipe)
}
// api flow control
if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
n = len(b)
for {
if len(b) <= int(s.kcp.mss) {
s.kcp.Send(b)
break
} else {
s.kcp.Send(b[:s.kcp.mss])
b = b[s.kcp.mss:]
}
}
if !s.writeDelay {
s.kcp.flush(false)
}
s.mu.Unlock()
atomic.AddUint64(&DefaultSnmp.BytesSent, uint64(n))
return n, nil
}
// write deadline
var timeout *time.Timer
var c <-chan time.Time
if !s.wd.IsZero() {
if time.Now().After(s.wd) {
s.mu.Unlock()
return 0, errTimeout{}
}
delay := s.wd.Sub(time.Now())
timeout = time.NewTimer(delay)
c = timeout.C
}
s.mu.Unlock()
// wait for write event or timeout
select {
case <-s.chWriteEvent:
case <-c:
case <-s.die:
}
if timeout != nil {
timeout.Stop()
}
}
}
// Close closes the connection.
func (s *UDPSession) Close() error {
// remove this session from updater & listener(if necessary)
updater.removeSession(s)
if s.l != nil { // notify listener
s.l.closeSession(sessionKey{
addr: s.remote.String(),
convID: s.kcp.conv,
})
}
s.mu.Lock()
defer s.mu.Unlock()
if s.isClosed {
return errors.New(errBrokenPipe)
}
close(s.die)
s.isClosed = true
atomic.AddUint64(&DefaultSnmp.CurrEstab, ^uint64(0))
if s.l == nil { // client socket close
return s.conn.Close()
}
return nil
}
// LocalAddr returns the local network address. The Addr returned is shared by all invocations of LocalAddr, so do not modify it.
func (s *UDPSession) LocalAddr() net.Addr { return s.conn.LocalAddr() }
// RemoteAddr returns the remote network address. The Addr returned is shared by all invocations of RemoteAddr, so do not modify it.
func (s *UDPSession) RemoteAddr() net.Addr { return s.remote }
// SetDeadline sets the deadline associated with the listener. A zero time value disables the deadline.
func (s *UDPSession) SetDeadline(t time.Time) error {
s.mu.Lock()
defer s.mu.Unlock()
s.rd = t
s.wd = t
return nil
}
// SetReadDeadline implements the Conn SetReadDeadline method.
func (s *UDPSession) SetReadDeadline(t time.Time) error {
s.mu.Lock()
defer s.mu.Unlock()
s.rd = t
return nil
}
// SetWriteDeadline implements the Conn SetWriteDeadline method.
func (s *UDPSession) SetWriteDeadline(t time.Time) error {
s.mu.Lock()
defer s.mu.Unlock()
s.wd = t
return nil
}
// SetWriteDelay delays write for bulk transfer until the next update interval
func (s *UDPSession) SetWriteDelay(delay bool) {
s.mu.Lock()
defer s.mu.Unlock()
s.writeDelay = delay
}
// SetWindowSize set maximum window size
func (s *UDPSession) SetWindowSize(sndwnd, rcvwnd int) {
s.mu.Lock()
defer s.mu.Unlock()
s.kcp.WndSize(sndwnd, rcvwnd)
}
// SetMtu sets the maximum transmission unit(not including UDP header)
func (s *UDPSession) SetMtu(mtu int) bool {
if mtu > mtuLimit {
return false
}
s.mu.Lock()
defer s.mu.Unlock()
s.kcp.SetMtu(mtu - s.headerSize)
return true
}
// SetStreamMode toggles the stream mode on/off
func (s *UDPSession) SetStreamMode(enable bool) {
s.mu.Lock()
defer s.mu.Unlock()
if enable {
s.kcp.stream = 1
} else {
s.kcp.stream = 0
}
}
// SetACKNoDelay changes ack flush option, set true to flush ack immediately,
func (s *UDPSession) SetACKNoDelay(nodelay bool) {
s.mu.Lock()
defer s.mu.Unlock()
s.ackNoDelay = nodelay
}
// SetDUP duplicates udp packets for kcp output, for testing purpose only
func (s *UDPSession) SetDUP(dup int) {
s.mu.Lock()
defer s.mu.Unlock()
s.dup = dup
}
// SetNoDelay calls nodelay() of kcp
// https://github.com/skywind3000/kcp/blob/master/README.en.md#protocol-configuration
func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
s.mu.Lock()
defer s.mu.Unlock()
s.kcp.NoDelay(nodelay, interval, resend, nc)
}
// SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener
func (s *UDPSession) SetDSCP(dscp int) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.l == nil {
if nc, ok := s.conn.(*connectedUDPConn); ok {
return ipv4.NewConn(nc.UDPConn).SetTOS(dscp << 2)
} else if nc, ok := s.conn.(net.Conn); ok {
return ipv4.NewConn(nc).SetTOS(dscp << 2)
}
}
return errors.New(errInvalidOperation)
}
// SetReadBuffer sets the socket read buffer, no effect if it's accepted from Listener
func (s *UDPSession) SetReadBuffer(bytes int) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.l == nil {
if nc, ok := s.conn.(setReadBuffer); ok {
return nc.SetReadBuffer(bytes)
}
}
return errors.New(errInvalidOperation)
}
// SetWriteBuffer sets the socket write buffer, no effect if it's accepted from Listener
func (s *UDPSession) SetWriteBuffer(bytes int) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.l == nil {
if nc, ok := s.conn.(setWriteBuffer); ok {
return nc.SetWriteBuffer(bytes)
}
}
return errors.New(errInvalidOperation)
}
// output pipeline entry
// steps for output data processing:
// 0. Header extends
// 1. FEC
// 2. CRC32
// 3. Encryption
// 4. WriteTo kernel
func (s *UDPSession) output(buf []byte) {
var ecc [][]byte
// 0. extend buf's header space(if necessary)
ext := buf
if s.headerSize > 0 {
ext = s.ext[:s.headerSize+len(buf)]
copy(ext[s.headerSize:], buf)
}
// 1. FEC encoding
if s.fecEncoder != nil {
ecc = s.fecEncoder.encode(ext)
}
// 2&3. crc32 & encryption
if s.block != nil {
io.ReadFull(rand.Reader, ext[:nonceSize])
checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
binary.LittleEndian.PutUint32(ext[nonceSize:], checksum)
s.block.Encrypt(ext, ext)
for k := range ecc {
io.ReadFull(rand.Reader, ecc[k][:nonceSize])
checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
s.block.Encrypt(ecc[k], ecc[k])
}
}
// 4. WriteTo kernel
nbytes := 0
npkts := 0
for i := 0; i < s.dup+1; i++ {
if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
nbytes += n
npkts++
}
}
for k := range ecc {
if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
nbytes += n
npkts++
}
}
atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes))
}
// kcp update, returns interval for next calling
func (s *UDPSession) update() (interval time.Duration) {
s.mu.Lock()
s.kcp.flush(false)
if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
s.notifyWriteEvent()
}
interval = time.Duration(s.kcp.interval) * time.Millisecond
s.mu.Unlock()
return
}
// GetConv gets conversation id of a session
func (s *UDPSession) GetConv() uint32 { return s.kcp.conv }
func (s *UDPSession) notifyReadEvent() {
select {
case s.chReadEvent <- struct{}{}:
default:
}
}
func (s *UDPSession) notifyWriteEvent() {
select {
case s.chWriteEvent <- struct{}{}:
default:
}
}
func (s *UDPSession) kcpInput(data []byte) {
var kcpInErrors, fecErrs, fecRecovered, fecParityShards uint64
if s.fecDecoder != nil {
f := s.fecDecoder.decodeBytes(data)
s.mu.Lock()
if f.flag == typeData {
if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true, s.ackNoDelay); ret != 0 {
kcpInErrors++
}
}
if f.flag == typeData || f.flag == typeFEC {
if f.flag == typeFEC {
fecParityShards++
}
recovers := s.fecDecoder.decode(f)
for _, r := range recovers {
if len(r) >= 2 { // must be larger than 2bytes
sz := binary.LittleEndian.Uint16(r)
if int(sz) <= len(r) && sz >= 2 {
if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
fecRecovered++
} else {
kcpInErrors++
}
} else {
fecErrs++
}
} else {
fecErrs++
}
}
}
// notify reader
if n := s.kcp.PeekSize(); n > 0 {
s.notifyReadEvent()
}
s.mu.Unlock()
} else {
s.mu.Lock()
if ret := s.kcp.Input(data, true, s.ackNoDelay); ret != 0 {
kcpInErrors++
}
// notify reader
if n := s.kcp.PeekSize(); n > 0 {
s.notifyReadEvent()
}
s.mu.Unlock()
}
atomic.AddUint64(&DefaultSnmp.InPkts, 1)
atomic.AddUint64(&DefaultSnmp.InBytes, uint64(len(data)))
if fecParityShards > 0 {
atomic.AddUint64(&DefaultSnmp.FECParityShards, fecParityShards)
}
if kcpInErrors > 0 {
atomic.AddUint64(&DefaultSnmp.KCPInErrors, kcpInErrors)
}
if fecErrs > 0 {
atomic.AddUint64(&DefaultSnmp.FECErrs, fecErrs)
}
if fecRecovered > 0 {
atomic.AddUint64(&DefaultSnmp.FECRecovered, fecRecovered)
}
}
func (s *UDPSession) receiver(ch chan<- []byte) {
for {
data := xmitBuf.Get().([]byte)[:mtuLimit]
if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
select {
case ch <- data[:n]:
case <-s.die:
return
}
} else if err != nil {
s.chErrorEvent <- err
return
} else {
atomic.AddUint64(&DefaultSnmp.InErrs, 1)
}
}
}
// read loop for client session
func (s *UDPSession) readLoop() {
chPacket := make(chan []byte, qlen)
go s.receiver(chPacket)
for {
select {
case data := <-chPacket:
raw := data
dataValid := false
if s.block != nil {
s.block.Decrypt(data, data)
data = data[nonceSize:]
checksum := crc32.ChecksumIEEE(data[crcSize:])
if checksum == binary.LittleEndian.Uint32(data) {
data = data[crcSize:]
dataValid = true
} else {
atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
}
} else if s.block == nil {
dataValid = true
}
if dataValid {
s.kcpInput(data)
}
xmitBuf.Put(raw)
case <-s.die:
return
}
}
}
type (
sessionKey struct {
addr string
convID uint32
}
// Listener defines a server listening for connections
Listener struct {
block BlockCrypt // block encryption
dataShards int // FEC data shard
parityShards int // FEC parity shard
fecDecoder *fecDecoder // FEC mock initialization
conn net.PacketConn // the underlying packet connection
sessions map[sessionKey]*UDPSession // all sessions accepted by this Listener
chAccepts chan *UDPSession // Listen() backlog
chSessionClosed chan sessionKey // session close queue
headerSize int // the overall header size added before KCP frame
die chan struct{} // notify the listener has closed
rd atomic.Value // read deadline for Accept()
wd atomic.Value
}
// incoming packet
inPacket struct {
from net.Addr
data []byte
}
)
// monitor incoming data for all connections of server
func (l *Listener) monitor() {
// cache last session
var lastKey sessionKey
var lastSession *UDPSession
chPacket := make(chan inPacket, qlen)
go l.receiver(chPacket)
for {
select {
case p := <-chPacket:
raw := p.data
data := p.data
from := p.from
dataValid := false
if l.block != nil {
l.block.Decrypt(data, data)
data = data[nonceSize:]
checksum := crc32.ChecksumIEEE(data[crcSize:])
if checksum == binary.LittleEndian.Uint32(data) {
data = data[crcSize:]
dataValid = true
} else {
atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
}
} else if l.block == nil {
dataValid = true
}
if dataValid {
var conv uint32
convValid := false
if l.fecDecoder != nil {
isfec := binary.LittleEndian.Uint16(data[4:])
if isfec == typeData {
conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
convValid = true
}
} else {
conv = binary.LittleEndian.Uint32(data)
convValid = true
}
if convValid {
key := sessionKey{
addr: from.String(),
convID: conv,
}
var s *UDPSession
var ok bool
// packets received from an address always come in batch.
// cache the session for next packet, without querying map.
if key == lastKey {
s, ok = lastSession, true
} else if s, ok = l.sessions[key]; ok {
lastSession = s
lastKey = key
}
if !ok { // new session
if len(l.chAccepts) < cap(l.chAccepts) && len(l.sessions) < 4096 { // do not let new session overwhelm accept queue and connection count
s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block)
s.kcpInput(data)
l.sessions[key] = s
l.chAccepts <- s
}
} else {
s.kcpInput(data)
}
}
}
xmitBuf.Put(raw)
case key := <-l.chSessionClosed:
if key == lastKey {
lastKey = sessionKey{}
}
delete(l.sessions, key)
case <-l.die:
return
}
}
}
func (l *Listener) receiver(ch chan<- inPacket) {
for {
data := xmitBuf.Get().([]byte)[:mtuLimit]
if n, from, err := l.conn.ReadFrom(data); err == nil && n >= l.headerSize+IKCP_OVERHEAD {
select {
case ch <- inPacket{from, data[:n]}:
case <-l.die:
return
}
} else if err != nil {
return
} else {
atomic.AddUint64(&DefaultSnmp.InErrs, 1)
}
}
}
// SetReadBuffer sets the socket read buffer for the Listener
func (l *Listener) SetReadBuffer(bytes int) error {
if nc, ok := l.conn.(setReadBuffer); ok {
return nc.SetReadBuffer(bytes)
}
return errors.New(errInvalidOperation)
}
// SetWriteBuffer sets the socket write buffer for the Listener
func (l *Listener) SetWriteBuffer(bytes int) error {
if nc, ok := l.conn.(setWriteBuffer); ok {
return nc.SetWriteBuffer(bytes)
}
return errors.New(errInvalidOperation)
}
// SetDSCP sets the 6bit DSCP field of IP header
func (l *Listener) SetDSCP(dscp int) error {
if nc, ok := l.conn.(net.Conn); ok {
return ipv4.NewConn(nc).SetTOS(dscp << 2)
}
return errors.New(errInvalidOperation)
}
// Accept implements the Accept method in the Listener interface; it waits for the next call and returns a generic Conn.
func (l *Listener) Accept() (net.Conn, error) {
return l.AcceptKCP()
}
// AcceptKCP accepts a KCP connection
func (l *Listener) AcceptKCP() (*UDPSession, error) {
var timeout <-chan time.Time
if tdeadline, ok := l.rd.Load().(time.Time); ok && !tdeadline.IsZero() {
timeout = time.After(tdeadline.Sub(time.Now()))
}
select {
case <-timeout:
return nil, &errTimeout{}
case c := <-l.chAccepts:
return c, nil
case <-l.die:
return nil, errors.New(errBrokenPipe)
}
}
// SetDeadline sets the deadline associated with the listener. A zero time value disables the deadline.
func (l *Listener) SetDeadline(t time.Time) error {
l.SetReadDeadline(t)
l.SetWriteDeadline(t)
return nil
}
// SetReadDeadline implements the Conn SetReadDeadline method.
func (l *Listener) SetReadDeadline(t time.Time) error {
l.rd.Store(t)
return nil
}
// SetWriteDeadline implements the Conn SetWriteDeadline method.
func (l *Listener) SetWriteDeadline(t time.Time) error {
l.wd.Store(t)
return nil
}
// Close stops listening on the UDP address. Already Accepted connections are not closed.
func (l *Listener) Close() error {
close(l.die)
return l.conn.Close()
}
// closeSession notify the listener that a session has closed
func (l *Listener) closeSession(key sessionKey) bool {
select {
case l.chSessionClosed <- key:
return true
case <-l.die:
return false
}
}
// Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
func (l *Listener) Addr() net.Addr { return l.conn.LocalAddr() }
// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
func Listen(laddr string) (net.Listener, error) { return ListenWithOptions(laddr, nil, 0, 0) }
// ListenWithOptions listens for incoming KCP packets addressed to the local address laddr on the network "udp" with packet encryption,
// dataShards, parityShards defines Reed-Solomon Erasure Coding parameters
func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards int) (*Listener, error) {
udpaddr, err := net.ResolveUDPAddr("udp", laddr)
if err != nil {
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
}
conn, err := net.ListenUDP("udp", udpaddr)
if err != nil {
return nil, errors.Wrap(err, "net.ListenUDP")
}
return ServeConn(block, dataShards, parityShards, conn)
}
// ServeConn serves KCP protocol for a single packet connection.
func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*Listener, error) {
l := new(Listener)
l.conn = conn
l.sessions = make(map[sessionKey]*UDPSession)
l.chAccepts = make(chan *UDPSession, acceptBacklog)
l.chSessionClosed = make(chan sessionKey)
l.die = make(chan struct{})
l.dataShards = dataShards
l.parityShards = parityShards
l.block = block
l.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
// calculate header size
if l.block != nil {
l.headerSize += cryptHeaderSize
}
if l.fecDecoder != nil {
l.headerSize += fecHeaderSizePlus2
}
go l.monitor()
return l, nil
}
// Dial connects to the remote address "raddr" on the network "udp"
func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0, 0) }
// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
if err != nil {
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
}
udpconn, err := net.DialUDP("udp", nil, udpaddr)
if err != nil {
return nil, errors.Wrap(err, "net.DialUDP")
}
return NewConn(raddr, block, dataShards, parityShards, &connectedUDPConn{udpconn})
}
// NewConn establishes a session and talks KCP protocol over a packet connection.
func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
if err != nil {
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
}
var convid uint32
binary.Read(rand.Reader, binary.LittleEndian, &convid)
return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil
}
func NewConnEx(convid uint32, connected bool, raddr string, block BlockCrypt, dataShards, parityShards int, conn *net.UDPConn) (*UDPSession, error) {
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
if err != nil {
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
}
var pConn net.PacketConn = conn
if connected {
pConn = &connectedUDPConn{conn}
}
return newUDPSession(convid, dataShards, parityShards, nil, pConn, udpaddr, block), nil
}
// returns current time in milliseconds
func currentMs() uint32 { return uint32(time.Now().UnixNano() / int64(time.Millisecond)) }
// connectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
// to Write syscalls that are 4 times faster on some OS'es. This should only be
// used for connections that were produced by a net.Dial* call.
type connectedUDPConn struct{ *net.UDPConn }
// WriteTo redirects all writes to the Write syscall, which is 4 times faster.
func (c *connectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { return c.Write(b) }

164
vendor/github.com/fatedier/kcp-go/snmp.go generated vendored

@ -0,0 +1,164 @@
package kcp
import (
"fmt"
"sync/atomic"
)
// Snmp defines network statistics indicator
type Snmp struct {
BytesSent uint64 // bytes sent from upper level
BytesReceived uint64 // bytes received to upper level
MaxConn uint64 // max number of connections ever reached
ActiveOpens uint64 // accumulated active open connections
PassiveOpens uint64 // accumulated passive open connections
CurrEstab uint64 // current number of established connections
InErrs uint64 // UDP read errors reported from net.PacketConn
InCsumErrors uint64 // checksum errors from CRC32
KCPInErrors uint64 // packet iput errors reported from KCP
InPkts uint64 // incoming packets count
OutPkts uint64 // outgoing packets count
InSegs uint64 // incoming KCP segments
OutSegs uint64 // outgoing KCP segments
InBytes uint64 // UDP bytes received
OutBytes uint64 // UDP bytes sent
RetransSegs uint64 // accmulated retransmited segments
FastRetransSegs uint64 // accmulated fast retransmitted segments
EarlyRetransSegs uint64 // accmulated early retransmitted segments
LostSegs uint64 // number of segs infered as lost
RepeatSegs uint64 // number of segs duplicated
FECRecovered uint64 // correct packets recovered from FEC
FECErrs uint64 // incorrect packets recovered from FEC
FECParityShards uint64 // FEC segments received
FECShortShards uint64 // number of data shards that's not enough for recovery
}
func newSnmp() *Snmp {
return new(Snmp)
}
// Header returns all field names
func (s *Snmp) Header() []string {
return []string{
"BytesSent",
"BytesReceived",
"MaxConn",
"ActiveOpens",
"PassiveOpens",
"CurrEstab",
"InErrs",
"InCsumErrors",
"KCPInErrors",
"InPkts",
"OutPkts",
"InSegs",
"OutSegs",
"InBytes",
"OutBytes",
"RetransSegs",
"FastRetransSegs",
"EarlyRetransSegs",
"LostSegs",
"RepeatSegs",
"FECParityShards",
"FECErrs",
"FECRecovered",
"FECShortShards",
}
}
// ToSlice returns current snmp info as slice
func (s *Snmp) ToSlice() []string {
snmp := s.Copy()
return []string{
fmt.Sprint(snmp.BytesSent),
fmt.Sprint(snmp.BytesReceived),
fmt.Sprint(snmp.MaxConn),
fmt.Sprint(snmp.ActiveOpens),
fmt.Sprint(snmp.PassiveOpens),
fmt.Sprint(snmp.CurrEstab),
fmt.Sprint(snmp.InErrs),
fmt.Sprint(snmp.InCsumErrors),
fmt.Sprint(snmp.KCPInErrors),
fmt.Sprint(snmp.InPkts),
fmt.Sprint(snmp.OutPkts),
fmt.Sprint(snmp.InSegs),
fmt.Sprint(snmp.OutSegs),
fmt.Sprint(snmp.InBytes),
fmt.Sprint(snmp.OutBytes),
fmt.Sprint(snmp.RetransSegs),
fmt.Sprint(snmp.FastRetransSegs),
fmt.Sprint(snmp.EarlyRetransSegs),
fmt.Sprint(snmp.LostSegs),
fmt.Sprint(snmp.RepeatSegs),
fmt.Sprint(snmp.FECParityShards),
fmt.Sprint(snmp.FECErrs),
fmt.Sprint(snmp.FECRecovered),
fmt.Sprint(snmp.FECShortShards),
}
}
// Copy make a copy of current snmp snapshot
func (s *Snmp) Copy() *Snmp {
d := newSnmp()
d.BytesSent = atomic.LoadUint64(&s.BytesSent)
d.BytesReceived = atomic.LoadUint64(&s.BytesReceived)
d.MaxConn = atomic.LoadUint64(&s.MaxConn)
d.ActiveOpens = atomic.LoadUint64(&s.ActiveOpens)
d.PassiveOpens = atomic.LoadUint64(&s.PassiveOpens)
d.CurrEstab = atomic.LoadUint64(&s.CurrEstab)
d.InErrs = atomic.LoadUint64(&s.InErrs)
d.InCsumErrors = atomic.LoadUint64(&s.InCsumErrors)
d.KCPInErrors = atomic.LoadUint64(&s.KCPInErrors)
d.InPkts = atomic.LoadUint64(&s.InPkts)
d.OutPkts = atomic.LoadUint64(&s.OutPkts)
d.InSegs = atomic.LoadUint64(&s.InSegs)
d.OutSegs = atomic.LoadUint64(&s.OutSegs)
d.InBytes = atomic.LoadUint64(&s.InBytes)
d.OutBytes = atomic.LoadUint64(&s.OutBytes)
d.RetransSegs = atomic.LoadUint64(&s.RetransSegs)
d.FastRetransSegs = atomic.LoadUint64(&s.FastRetransSegs)
d.EarlyRetransSegs = atomic.LoadUint64(&s.EarlyRetransSegs)
d.LostSegs = atomic.LoadUint64(&s.LostSegs)
d.RepeatSegs = atomic.LoadUint64(&s.RepeatSegs)
d.FECParityShards = atomic.LoadUint64(&s.FECParityShards)
d.FECErrs = atomic.LoadUint64(&s.FECErrs)
d.FECRecovered = atomic.LoadUint64(&s.FECRecovered)
d.FECShortShards = atomic.LoadUint64(&s.FECShortShards)
return d
}
// Reset values to zero
func (s *Snmp) Reset() {
atomic.StoreUint64(&s.BytesSent, 0)
atomic.StoreUint64(&s.BytesReceived, 0)
atomic.StoreUint64(&s.MaxConn, 0)
atomic.StoreUint64(&s.ActiveOpens, 0)
atomic.StoreUint64(&s.PassiveOpens, 0)
atomic.StoreUint64(&s.CurrEstab, 0)
atomic.StoreUint64(&s.InErrs, 0)
atomic.StoreUint64(&s.InCsumErrors, 0)
atomic.StoreUint64(&s.KCPInErrors, 0)
atomic.StoreUint64(&s.InPkts, 0)
atomic.StoreUint64(&s.OutPkts, 0)
atomic.StoreUint64(&s.InSegs, 0)
atomic.StoreUint64(&s.OutSegs, 0)
atomic.StoreUint64(&s.InBytes, 0)
atomic.StoreUint64(&s.OutBytes, 0)
atomic.StoreUint64(&s.RetransSegs, 0)
atomic.StoreUint64(&s.FastRetransSegs, 0)
atomic.StoreUint64(&s.EarlyRetransSegs, 0)
atomic.StoreUint64(&s.LostSegs, 0)
atomic.StoreUint64(&s.RepeatSegs, 0)
atomic.StoreUint64(&s.FECParityShards, 0)
atomic.StoreUint64(&s.FECErrs, 0)
atomic.StoreUint64(&s.FECRecovered, 0)
atomic.StoreUint64(&s.FECShortShards, 0)
}
// DefaultSnmp is the global KCP connection statistics collector
var DefaultSnmp *Snmp
func init() {
DefaultSnmp = newSnmp()
}

105
vendor/github.com/fatedier/kcp-go/updater.go generated vendored

@ -0,0 +1,105 @@
package kcp
import (
"container/heap"
"sync"
"time"
)
var updater updateHeap
func init() {
updater.init()
go updater.updateTask()
}
// entry contains a session update info
type entry struct {
ts time.Time
s *UDPSession
}
// a global heap managed kcp.flush() caller
type updateHeap struct {
entries []entry
mu sync.Mutex
chWakeUp chan struct{}
}
func (h *updateHeap) Len() int { return len(h.entries) }
func (h *updateHeap) Less(i, j int) bool { return h.entries[i].ts.Before(h.entries[j].ts) }
func (h *updateHeap) Swap(i, j int) {
h.entries[i], h.entries[j] = h.entries[j], h.entries[i]
h.entries[i].s.updaterIdx = i
h.entries[j].s.updaterIdx = j
}
func (h *updateHeap) Push(x interface{}) {
h.entries = append(h.entries, x.(entry))
n := len(h.entries)
h.entries[n-1].s.updaterIdx = n - 1
}
func (h *updateHeap) Pop() interface{} {
n := len(h.entries)
x := h.entries[n-1]
h.entries[n-1].s.updaterIdx = -1
h.entries[n-1] = entry{} // manual set nil for GC
h.entries = h.entries[0 : n-1]
return x
}
func (h *updateHeap) init() {
h.chWakeUp = make(chan struct{}, 1)
}
func (h *updateHeap) addSession(s *UDPSession) {
h.mu.Lock()
heap.Push(h, entry{time.Now(), s})
h.mu.Unlock()
h.wakeup()
}
func (h *updateHeap) removeSession(s *UDPSession) {
h.mu.Lock()
if s.updaterIdx != -1 {
heap.Remove(h, s.updaterIdx)
}
h.mu.Unlock()
}
func (h *updateHeap) wakeup() {
select {
case h.chWakeUp <- struct{}{}:
default:
}
}
func (h *updateHeap) updateTask() {
var timer <-chan time.Time
for {
select {
case <-timer:
case <-h.chWakeUp:
}
h.mu.Lock()
hlen := h.Len()
now := time.Now()
for i := 0; i < hlen; i++ {
entry := heap.Pop(h).(entry)
if now.After(entry.ts) {
entry.ts = now.Add(entry.s.update())
heap.Push(h, entry)
} else {
heap.Push(h, entry)
break
}
}
if hlen > 0 {
timer = time.After(h.entries[0].ts.Sub(now))
}
h.mu.Unlock()
}
}

110
vendor/github.com/fatedier/kcp-go/xor.go generated vendored

@ -0,0 +1,110 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package kcp
import (
"runtime"
"unsafe"
)
const wordSize = int(unsafe.Sizeof(uintptr(0)))
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
// fastXORBytes xors in bulk. It only works on architectures that
// support unaligned read/writes.
func fastXORBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
w := n / wordSize
if w > 0 {
wordBytes := w * wordSize
fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
}
for i := (n - n%wordSize); i < n; i++ {
dst[i] = a[i] ^ b[i]
}
return n
}
func safeXORBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
ex := n % 8
for i := 0; i < ex; i++ {
dst[i] = a[i] ^ b[i]
}
for i := ex; i < n; i += 8 {
_dst := dst[i : i+8]
_a := a[i : i+8]
_b := b[i : i+8]
_dst[0] = _a[0] ^ _b[0]
_dst[1] = _a[1] ^ _b[1]
_dst[2] = _a[2] ^ _b[2]
_dst[3] = _a[3] ^ _b[3]
_dst[4] = _a[4] ^ _b[4]
_dst[5] = _a[5] ^ _b[5]
_dst[6] = _a[6] ^ _b[6]
_dst[7] = _a[7] ^ _b[7]
}
return n
}
// xorBytes xors the bytes in a and b. The destination is assumed to have enough
// space. Returns the number of bytes xor'd.
func xorBytes(dst, a, b []byte) int {
if supportsUnaligned {
return fastXORBytes(dst, a, b)
}
// TODO(hanwen): if (dst, a, b) have common alignment
// we could still try fastXORBytes. It is not clear
// how often this happens, and it's only worth it if
// the block encryption itself is hardware
// accelerated.
return safeXORBytes(dst, a, b)
}
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
// The arguments are assumed to be of equal length.
func fastXORWords(dst, a, b []byte) {
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
n := len(b) / wordSize
ex := n % 8
for i := 0; i < ex; i++ {
dw[i] = aw[i] ^ bw[i]
}
for i := ex; i < n; i += 8 {
_dw := dw[i : i+8]
_aw := aw[i : i+8]
_bw := bw[i : i+8]
_dw[0] = _aw[0] ^ _bw[0]
_dw[1] = _aw[1] ^ _bw[1]
_dw[2] = _aw[2] ^ _bw[2]
_dw[3] = _aw[3] ^ _bw[3]
_dw[4] = _aw[4] ^ _bw[4]
_dw[5] = _aw[5] ^ _bw[5]
_dw[6] = _aw[6] ^ _bw[6]
_dw[7] = _aw[7] ^ _bw[7]
}
}
func xorWords(dst, a, b []byte) {
if supportsUnaligned {
fastXORWords(dst, a, b)
} else {
safeXORBytes(dst, a, b)
}
}

14
vendor/github.com/templexxx/cpufeat/.gitignore generated vendored

@ -0,0 +1,14 @@
# Binaries for programs and plugins
*.exe
*.dll
*.so
*.dylib
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
.glide/

27
vendor/github.com/templexxx/cpufeat/LICENSE generated vendored

@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

32
vendor/github.com/templexxx/cpufeat/cpu.go generated vendored

@ -0,0 +1,32 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package cpu implements processor feature detection
// used by the Go standard libary.
package cpufeat
var X86 x86
// The booleans in x86 contain the correspondingly named cpuid feature bit.
// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
// in addition to the cpuid feature bit being set.
// The struct is padded to avoid false sharing.
type x86 struct {
_ [CacheLineSize]byte
HasAES bool
HasAVX bool
HasAVX2 bool
HasBMI1 bool
HasBMI2 bool
HasERMS bool
HasOSXSAVE bool
HasPCLMULQDQ bool
HasPOPCNT bool
HasSSE2 bool
HasSSE3 bool
HasSSSE3 bool
HasSSE41 bool
HasSSE42 bool
_ [CacheLineSize]byte
}

7
vendor/github.com/templexxx/cpufeat/cpu_arm.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_arm64.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_mips.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_mips64.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_mips64le.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_mipsle.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 32

7
vendor/github.com/templexxx/cpufeat/cpu_ppc64.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 128

7
vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 128

7
vendor/github.com/templexxx/cpufeat/cpu_s390x.go generated vendored

@ -0,0 +1,7 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpufeat
const CacheLineSize = 256

59
vendor/github.com/templexxx/cpufeat/cpu_x86.go generated vendored

@ -0,0 +1,59 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32
package cpufeat
const CacheLineSize = 64
// cpuid is implemented in cpu_x86.s.
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
// xgetbv with ecx = 0 is implemented in cpu_x86.s.
func xgetbv() (eax, edx uint32)
func init() {
maxId, _, _, _ := cpuid(0, 0)
if maxId < 1 {
return
}
_, _, ecx1, edx1 := cpuid(1, 0)
X86.HasSSE2 = isSet(26, edx1)
X86.HasSSE3 = isSet(0, ecx1)
X86.HasPCLMULQDQ = isSet(1, ecx1)
X86.HasSSSE3 = isSet(9, ecx1)
X86.HasSSE41 = isSet(19, ecx1)
X86.HasSSE42 = isSet(20, ecx1)
X86.HasPOPCNT = isSet(23, ecx1)
X86.HasAES = isSet(25, ecx1)
X86.HasOSXSAVE = isSet(27, ecx1)
osSupportsAVX := false
// For XGETBV, OSXSAVE bit is required and sufficient.
if X86.HasOSXSAVE {
eax, _ := xgetbv()
// Check if XMM and YMM registers have OS support.
osSupportsAVX = isSet(1, eax) && isSet(2, eax)
}
X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
if maxId < 7 {
return
}
_, ebx7, _, _ := cpuid(7, 0)
X86.HasBMI1 = isSet(3, ebx7)
X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
X86.HasBMI2 = isSet(8, ebx7)
X86.HasERMS = isSet(9, ebx7)
}
func isSet(bitpos uint, value uint32) bool {
return value&(1<<bitpos) != 0
}

32
vendor/github.com/templexxx/cpufeat/cpu_x86.s generated vendored

@ -0,0 +1,32 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32
#include "textflag.h"
// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
TEXT ·cpuid(SB), NOSPLIT, $0-24
MOVL eaxArg+0(FP), AX
MOVL ecxArg+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv() (eax, edx uint32)
TEXT ·xgetbv(SB),NOSPLIT,$0-8
#ifdef GOOS_nacl
// nacl does not support XGETBV.
MOVL $0, eax+0(FP)
MOVL $0, edx+4(FP)
#else
MOVL $0, CX
WORD $0x010f; BYTE $0xd0 //XGETBV
MOVL AX, eax+0(FP)
MOVL DX, edx+4(FP)
#endif
RET

40
vendor/github.com/templexxx/reedsolomon/.gitignore generated vendored

@ -0,0 +1,40 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
/.idea
/backup
/loopunroll/
cpu.out
mathtool/galois/
mathtool/matrix/
mem.out
/examples/
/.DS_Store
/mathtool/cntinverse
/invert
/bakcup
/buf.svg
*.svg
*.out
/escape

9
vendor/github.com/templexxx/reedsolomon/.travis.yml generated vendored

@ -0,0 +1,9 @@
language: go
go:
- 1.9
install:
- go get github.com/templexxx/reedsolomon
script:
- go test -v

23
vendor/github.com/templexxx/reedsolomon/LICENSE generated vendored

@ -0,0 +1,23 @@
MIT License
Copyright (c) 2017 Templexxx
Copyright (c) 2015 Klaus Post
Copyright (c) 2015 Backblaze
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

109
vendor/github.com/templexxx/reedsolomon/README.md generated vendored

@ -0,0 +1,109 @@
# Reed-Solomon
[![GoDoc][1]][2] [![MIT licensed][3]][4] [![Build Status][5]][6] [![Go Report Card][7]][8]
[1]: https://godoc.org/github.com/templexxx/reedsolomon?status.svg
[2]: https://godoc.org/github.com/templexxx/reedsolomon
[3]: https://img.shields.io/badge/license-MIT-blue.svg
[4]: LICENSE
[5]: https://travis-ci.org/templexxx/reedsolomon.svg?branch=master
[6]: https://travis-ci.org/templexxx/reedsolomon
[7]: https://goreportcard.com/badge/github.com/templexxx/reedsolomon
[8]: https://goreportcard.com/report/github.com/templexxx/reedsolomon
## Introduction:
1. Reed-Solomon Erasure Code engine in pure Go.
2. Super Fast: more than 10GB/s per physics core ( 10+4, 4KB per vector, Macbook Pro 2.8 GHz Intel Core i7 )
## Installation
To get the package use the standard:
```bash
go get github.com/templexxx/reedsolomon
```
## Documentation
See the associated [GoDoc](http://godoc.org/github.com/templexxx/reedsolomon)
## Specification
### GOARCH
1. All arch are supported
2. 0.1.0 need go1.9 for sync.Map in AMD64
### Math
1. Coding over in GF(2^8)
2. Primitive Polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x1d)
3. mathtool/gentbls.go : generator Primitive Polynomial and it's log table, exp table, multiply table, inverse table etc. We can get more info about how galois field work
4. mathtool/cntinverse.go : calculate how many inverse matrix will have in different RS codes config
5. Both of Cauchy and Vandermonde Matrix are supported. Vandermonde need more operations for preserving the property that any square subset of rows is invertible
### Why so fast?
These three parts will cost too much time:
1. lookup galois-field tables
2. read/write memory
3. calculate inverse matrix in the reconstruct process
SIMD will solve no.1
Cache-friendly codes will help to solve no.2 & no.3, and more, use a sync.Map for cache inverse matrix, it will help to save about 1000ns when we need same matrix.
## Performance
Performance depends mainly on:
1. CPU instruction extension( AVX2 or SSSE3 or none )
2. number of data/parity vects
3. unit size of calculation ( see it in rs_amd64.go )
4. size of shards
5. speed of memory (waste so much time on read/write mem, :D )
6. performance of CPU
7. the way of using ( reuse memory)
And we must know the benchmark test is quite different with encoding/decoding in practice.
Because in benchmark test loops, the CPU Cache will help a lot. In practice, we must reuse the memory to make the performance become as good as the benchmark test.
Example of performance on my MacBook 2017 i7 2.8GHz. 10+4 (with 0.1.0).
### Encoding:
| Vector size | Speed (MB/S) |
|----------------|--------------|
| 1400B | 7655.02 |
| 4KB | 10551.37 |
| 64KB | 9297.25 |
| 1MB | 6829.89 |
| 16MB | 6312.83 |
### Reconstruct (use nil to point which one need repair):
| Vector size | Speed (MB/S) |
|----------------|--------------|
| 1400B | 4124.85 |
| 4KB | 5715.45 |
| 64KB | 6050.06 |
| 1MB | 5001.21 |
| 16MB | 5043.04 |
### ReconstructWithPos (use a position list to point which one need repair, reuse the memory):
| Vector size | Speed (MB/S) |
|----------------|--------------|
| 1400B | 6170.24 |
| 4KB | 9444.86 |
| 64KB | 9311.30 |
| 1MB | 6781.06 |
| 16MB | 6285.34 |
**reconstruct benchmark tests here run with inverse matrix cache, if there is no cache, it will cost more time( about 1000ns)**
## Who is using this?
1. https://github.com/xtaci/kcp-go -- A Production-Grade Reliable-UDP Library for golang
## Links & Thanks
* [Klauspost ReedSolomon](https://github.com/klauspost/reedsolomon)
* [intel ISA-L](https://github.com/01org/isa-l)
* [GF SIMD] (http://www.ssrc.ucsc.edu/papers/plank-fast13.pdf)
* [asm2plan9s] (https://github.com/fwessels/asm2plan9s)

156
vendor/github.com/templexxx/reedsolomon/matrix.go generated vendored

@ -0,0 +1,156 @@
package reedsolomon
import "errors"
type matrix []byte
func genEncMatrixCauchy(d, p int) matrix {
t := d + p
m := make([]byte, t*d)
for i := 0; i < d; i++ {
m[i*d+i] = byte(1)
}
d2 := d * d
for i := d; i < t; i++ {
for j := 0; j < d; j++ {
d := i ^ j
a := inverseTbl[d]
m[d2] = byte(a)
d2++
}
}
return m
}
func gfExp(b byte, n int) byte {
if n == 0 {
return 1
}
if b == 0 {
return 0
}
a := logTbl[b]
ret := int(a) * n
for ret >= 255 {
ret -= 255
}
return byte(expTbl[ret])
}
func genVandMatrix(vm []byte, t, d int) {
for i := 0; i < t; i++ {
for j := 0; j < d; j++ {
vm[i*d+j] = gfExp(byte(i), j)
}
}
}
func (m matrix) mul(right matrix, rows, cols int, r []byte) {
for i := 0; i < rows; i++ {
for j := 0; j < cols; j++ {
var v byte
for k := 0; k < cols; k++ {
v ^= gfMul(m[i*cols+k], right[k*cols+j])
}
r[i*cols+j] = v
}
}
}
func genEncMatrixVand(d, p int) (matrix, error) {
t := d + p
buf := make([]byte, (2*t+4*d)*d)
vm := buf[:t*d]
genVandMatrix(vm, t, d)
top := buf[t*d : (t+d)*d]
copy(top, vm[:d*d])
raw := buf[(t+d)*d : (t+3*d)*d]
im := buf[(t+3*d)*d : (t+4*d)*d]
err := matrix(top).invert(raw, d, im)
if err != nil {
return nil, err
}
r := buf[(t+4*d)*d : (2*t+4*d)*d]
matrix(vm).mul(im, t, d, r)
return matrix(r), nil
}
// [I|m'] -> [m']
func (m matrix) subMatrix(n int, r []byte) {
for i := 0; i < n; i++ {
off := i * n
copy(r[off:off+n], m[2*off+n:2*(off+n)])
}
}
func (m matrix) invert(raw matrix, n int, im []byte) error {
// [m] -> [m|I]
for i := 0; i < n; i++ {
t := i * n
copy(raw[2*t:2*t+n], m[t:t+n])
raw[2*t+i+n] = byte(1)
}
err := gauss(raw, n)
if err != nil {
return err
}
raw.subMatrix(n, im)
return nil
}
func (m matrix) swap(i, j, n int) {
for k := 0; k < n; k++ {
m[i*n+k], m[j*n+k] = m[j*n+k], m[i*n+k]
}
}
func gfMul(a, b byte) byte {
return mulTbl[a][b]
}
var errSingular = errors.New("rs.invert: matrix is singular")
// [m|I] -> [I|m']
func gauss(m matrix, n int) error {
n2 := 2 * n
for i := 0; i < n; i++ {
if m[i*n2+i] == 0 {
for j := i + 1; j < n; j++ {
if m[j*n2+i] != 0 {
m.swap(i, j, n2)
break
}
}
}
if m[i*n2+i] == 0 {
return errSingular
}
if m[i*n2+i] != 1 {
d := m[i*n2+i]
scale := inverseTbl[d]
for c := 0; c < n2; c++ {
m[i*n2+c] = gfMul(m[i*n2+c], scale)
}
}
for j := i + 1; j < n; j++ {
if m[j*n2+i] != 0 {
scale := m[j*n2+i]
for c := 0; c < n2; c++ {
m[j*n2+c] ^= gfMul(scale, m[i*n2+c])
}
}
}
}
for k := 0; k < n; k++ {
for j := 0; j < k; j++ {
if m[j*n2+k] != 0 {
scale := m[j*n2+k]
for c := 0; c < n2; c++ {
m[j*n2+c] ^= gfMul(scale, m[k*n2+c])
}
}
}
}
return nil
}

280
vendor/github.com/templexxx/reedsolomon/rs.go generated vendored

@ -0,0 +1,280 @@
/*
Reed-Solomon Codes over GF(2^8)
Primitive Polynomial: x^8+x^4+x^3+x^2+1
Galois Filed arithmetic using Intel SIMD instructions (AVX2 or SSSE3)
*/
package reedsolomon
import "errors"
// Encoder implements for Reed-Solomon Encoding/Reconstructing
type Encoder interface {
// Encode multiply generator-matrix with data
// len(vects) must be equal with num of data+parity
Encode(vects [][]byte) error
// Result of reconst will be put into origin position of vects
// it means if you lost vects[0], after reconst the vects[0]'s data will be back in vects[0]
// Reconstruct repair lost data & parity
// Set vect nil if lost
Reconstruct(vects [][]byte) error
// Reconstruct repair lost data
// Set vect nil if lost
ReconstructData(vects [][]byte) error
// ReconstWithPos repair lost data&parity with has&lost vects position
// Save bandwidth&disk I/O (cmp with Reconstruct, if the lost is less than num of parity)
// As erasure codes, we must know which vect is broken,
// so it's necessary to provide such APIs
// len(has) must equal num of data vects
// Example:
// in 3+2, the whole position: [0,1,2,3,4]
// if lost vects[0]
// the "has" could be [1,2,3] or [1,2,4] or ...
// then you must be sure that vects[1] vects[2] vects[3] have correct data (if the "has" is [1,2,3])
// the "dLost" will be [0]
// ps:
// 1. the above lists are in increasing orders TODO support out-of-order
// 2. each vect has same len, don't set it nil
// so we don't need to make slice
ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error
//// ReconstWithPos repair lost data with survived&lost vects position
//// Don't need to append position of parity lost into "lost"
ReconstDataWithPos(vects [][]byte, has, dLost []int) error
}
func checkCfg(d, p int) error {
if (d <= 0) || (p <= 0) {
return errors.New("rs.New: data or parity <= 0")
}
if d+p >= 256 {
return errors.New("rs.New: data+parity >= 256")
}
return nil
}
// New create an Encoder (vandermonde matrix as Encoding matrix)
func New(data, parity int) (enc Encoder, err error) {
err = checkCfg(data, parity)
if err != nil {
return
}
e, err := genEncMatrixVand(data, parity)
if err != nil {
return
}
return newRS(data, parity, e), nil
}
// NewCauchy create an Encoder (cauchy matrix as Generator Matrix)
func NewCauchy(data, parity int) (enc Encoder, err error) {
err = checkCfg(data, parity)
if err != nil {
return
}
e := genEncMatrixCauchy(data, parity)
return newRS(data, parity, e), nil
}
type encBase struct {
data int
parity int
encode []byte
gen []byte
}
func checkEnc(d, p int, vs [][]byte) (size int, err error) {
total := len(vs)
if d+p != total {
err = errors.New("rs.checkER: vects not match rs args")
return
}
size = len(vs[0])
if size == 0 {
err = errors.New("rs.checkER: vects size = 0")
return
}
for i := 1; i < total; i++ {
if len(vs[i]) != size {
err = errors.New("rs.checkER: vects size mismatch")
return
}
}
return
}
func (e *encBase) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
_, err = checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i], pv[j])
} else {
mulVect(g[j*d], dv[0], pv[j])
}
}
}
return
}
func mulVect(c byte, a, b []byte) {
t := mulTbl[c]
for i := 0; i < len(a); i++ {
b[i] = t[a[i]]
}
}
func mulVectAdd(c byte, a, b []byte) {
t := mulTbl[c]
for i := 0; i < len(a); i++ {
b[i] ^= t[a[i]]
}
}
func (e *encBase) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encBase) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encBase) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encBase) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encBase) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
matrixbuf := make([]byte, 4*d*d+dCnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
etmp := &encBase{data: d, parity: dCnt, gen: g}
err2 = etmp.Encode(vtmp[:d+dCnt])
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
vtmp := make([][]byte, d+pCnt)
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encBase{data: d, parity: pCnt, gen: g}
err2 := etmp.Encode(vtmp[:d+pCnt])
if err2 != nil {
return err2
}
}
return
}
func (e *encBase) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
// TODO check more, maybe element in has show in lost & deal with len(has) > d
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encBase) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}

868
vendor/github.com/templexxx/reedsolomon/rs_amd64.go generated vendored

@ -0,0 +1,868 @@
package reedsolomon
import (
"errors"
"sync"
"github.com/templexxx/cpufeat"
)
// SIMD Instruction Extensions
const (
none = iota
avx2
ssse3
)
var extension = none
func init() {
getEXT()
}
func getEXT() {
if cpufeat.X86.HasAVX2 {
extension = avx2
return
} else if cpufeat.X86.HasSSSE3 {
extension = ssse3
return
} else {
extension = none
return
}
}
//go:noescape
func copy32B(dst, src []byte) // Need SSE2(introduced in 2001)
func initTbl(g matrix, rows, cols int, tbl []byte) {
off := 0
for i := 0; i < cols; i++ {
for j := 0; j < rows; j++ {
c := g[j*cols+i]
t := lowhighTbl[c][:]
copy32B(tbl[off:off+32], t)
off += 32
}
}
}
// At most 3060 inverse matrix (when data=14, parity=4, calc by mathtool/cntinverse)
// In practice, data usually below 12, parity below 5
func okCache(data, parity int) bool {
if data < 15 && parity < 5 { // you can change it, but the data+parity can't be bigger than 32 (tips: see the codes about make inverse matrix)
return true
}
return false
}
type (
encSSSE3 encSIMD
encAVX2 encSIMD
encSIMD struct {
data int
parity int
encode matrix
gen matrix
tbl []byte
// inverse matrix cache is design for small vect size ( < 4KB )
// it will save time for calculating inverse matrix
// but it's not so important for big vect size
enableCache bool
inverseCache iCache
}
iCache struct {
sync.RWMutex
data map[uint32][]byte
}
)
func newRS(d, p int, em matrix) (enc Encoder) {
g := em[d*d:]
if extension == none {
return &encBase{data: d, parity: p, encode: em, gen: g}
}
t := make([]byte, d*p*32)
initTbl(g, p, d, t)
ok := okCache(d, p)
if extension == avx2 {
e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
inverseCache: iCache{data: make(map[uint32][]byte)}}
return e
}
e := &encSSSE3{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
inverseCache: iCache{data: make(map[uint32][]byte)}}
return e
}
// Size of sub-vector
const unit int = 16 * 1024
func getDo(n int) int {
if n < unit {
c := n >> 4
if c == 0 {
return unit
}
return c << 4
}
return unit
}
func (e *encAVX2) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMul(start, end, dv, pv)
start = end
} else {
e.matrixMulRemain(start, size, dv, pv)
start = size
}
}
return
}
//go:noescape
func mulVectAVX2(tbl, d, p []byte)
//go:noescape
func mulVectAddAVX2(tbl, d, p []byte)
func (e *encAVX2) matrixMul(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
tbl := e.tbl
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
}
off += 32
}
}
}
func (e *encAVX2) matrixMulRemain(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
tbl := e.tbl
if do >= 16 {
end2 := start + do
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
}
off += 32
}
}
start = end
}
if undone > do {
// may recalculate some data, but still improve a lot
start2 := end - 16
if start2 >= 0 {
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
}
off += 32
}
}
} else {
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
// use generator-matrix but not tbls for encoding
// it's design for reconstructing
// for small vects, it cost to much time on initTbl, so drop it
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
func (e *encAVX2) encodeGen(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMulGen(start, end, dv, pv)
start = end
} else {
e.matrixMulRemainGen(start, size, dv, pv)
start = size
}
}
return
}
func (e *encAVX2) matrixMulGen(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
}
}
}
}
func (e *encAVX2) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
g := e.gen
if do >= 16 {
end2 := start + do
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
}
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
}
}
}
} else {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
func (e *encAVX2) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encAVX2) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encAVX2) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encAVX2) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encAVX2) makeGen(has, dLost []int) (gen []byte, err error) {
d := e.data
em := e.encode
cnt := len(dLost)
if !e.enableCache {
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
var ikey uint32
for _, p := range has {
ikey += 1 << uint8(p)
}
e.inverseCache.RLock()
v, ok := e.inverseCache.data[ikey]
if ok {
im := v
g := make([]byte, cnt*d)
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
e.inverseCache.RUnlock()
return g, nil
}
e.inverseCache.RUnlock()
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
e.inverseCache.Lock()
e.inverseCache.data[ikey] = im
e.inverseCache.Unlock()
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
func (e *encAVX2) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
g, err2 := e.makeGen(has, dLost)
if err2 != nil {
return
}
etmp := &encAVX2{data: d, parity: dCnt, gen: g}
err2 = etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
vtmp := make([][]byte, d+pCnt)
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encAVX2{data: d, parity: pCnt, gen: g}
err2 := etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
return
}
func (e *encAVX2) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encAVX2) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encSSSE3) Encode(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMul(start, end, dv, pv)
start = end
} else {
e.matrixMulRemain(start, size, dv, pv)
start = size
}
}
return
}
//go:noescape
func mulVectSSSE3(tbl, d, p []byte)
//go:noescape
func mulVectAddSSSE3(tbl, d, p []byte)
func (e *encSSSE3) matrixMul(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
tbl := e.tbl
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
}
off += 32
}
}
}
func (e *encSSSE3) matrixMulRemain(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
tbl := e.tbl
if do >= 16 {
end2 := start + do
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
}
off += 32
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
off := 0
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := tbl[off : off+32]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
}
off += 32
}
}
} else {
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
// use generator-matrix but not tbls for encoding
// it's design for reconstructing
// for small vects, it cost to much time on initTbl, so drop it
// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
func (e *encSSSE3) encodeGen(vects [][]byte) (err error) {
d := e.data
p := e.parity
size, err := checkEnc(d, p, vects)
if err != nil {
return
}
dv := vects[:d]
pv := vects[d:]
start, end := 0, 0
do := getDo(size)
for start < size {
end = start + do
if end <= size {
e.matrixMulGen(start, end, dv, pv)
start = end
} else {
e.matrixMulRemainGen(start, size, dv, pv)
start = size
}
}
return
}
func (e *encSSSE3) matrixMulGen(start, end int, dv, pv [][]byte) {
d := e.data
p := e.parity
g := e.gen
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
} else {
mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
}
}
}
}
func (e *encSSSE3) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
undone := end - start
do := (undone >> 4) << 4
d := e.data
p := e.parity
g := e.gen
if do >= 16 {
end2 := start + do
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
} else {
mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
}
}
}
start = end
}
if undone > do {
start2 := end - 16
if start2 >= 0 {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
t := lowhighTbl[g[j*d+i]][:]
if i != 0 {
mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
} else {
mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
}
}
}
} else {
for i := 0; i < d; i++ {
for j := 0; j < p; j++ {
if i != 0 {
mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
} else {
mulVect(g[j*d], dv[0][start:], pv[j][start:])
}
}
}
}
}
}
func (e *encSSSE3) Reconstruct(vects [][]byte) (err error) {
return e.reconstruct(vects, false)
}
func (e *encSSSE3) ReconstructData(vects [][]byte) (err error) {
return e.reconstruct(vects, true)
}
func (e *encSSSE3) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
return e.reconstWithPos(vects, has, dLost, pLost, false)
}
func (e *encSSSE3) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
return e.reconstWithPos(vects, has, dLost, nil, true)
}
func (e *encSSSE3) makeGen(has, dLost []int) (gen []byte, err error) {
d := e.data
em := e.encode
cnt := len(dLost)
if !e.enableCache {
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
var ikey uint32
for _, p := range has {
ikey += 1 << uint8(p)
}
e.inverseCache.RLock()
v, ok := e.inverseCache.data[ikey]
if ok {
im := v
g := make([]byte, cnt*d)
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
e.inverseCache.RUnlock()
return g, nil
}
e.inverseCache.RUnlock()
matrixbuf := make([]byte, 4*d*d+cnt*d)
m := matrixbuf[:d*d]
for i, l := range has {
copy(m[i*d:i*d+d], em[l*d:l*d+d])
}
raw := matrixbuf[d*d : 3*d*d]
im := matrixbuf[3*d*d : 4*d*d]
err2 := matrix(m).invert(raw, d, im)
if err2 != nil {
return nil, err2
}
e.inverseCache.Lock()
e.inverseCache.data[ikey] = im
e.inverseCache.Unlock()
g := matrixbuf[4*d*d:]
for i, l := range dLost {
copy(g[i*d:i*d+d], im[l*d:l*d+d])
}
return g, nil
}
func (e *encSSSE3) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
em := e.encode
dCnt := len(dLost)
size := len(vects[has[0]])
if dCnt != 0 {
vtmp := make([][]byte, d+dCnt)
for i, p := range has {
vtmp[i] = vects[p]
}
for i, p := range dLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
g, err2 := e.makeGen(has, dLost)
if err2 != nil {
return
}
etmp := &encSSSE3{data: d, parity: dCnt, gen: g}
err2 = etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
if dataOnly {
return
}
pCnt := len(pLost)
if pCnt != 0 {
g := make([]byte, pCnt*d)
for i, l := range pLost {
copy(g[i*d:i*d+d], em[l*d:l*d+d])
}
vtmp := make([][]byte, d+pCnt)
for i := 0; i < d; i++ {
vtmp[i] = vects[i]
}
for i, p := range pLost {
if len(vects[p]) == 0 {
vects[p] = make([]byte, size)
}
vtmp[i+d] = vects[p]
}
etmp := &encSSSE3{data: d, parity: pCnt, gen: g}
err2 := etmp.encodeGen(vtmp)
if err2 != nil {
return err2
}
}
return
}
func (e *encSSSE3) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
d := e.data
p := e.parity
if len(has) != d {
return errors.New("rs.Reconst: not enough vects")
}
dCnt := len(dLost)
if dCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
pCnt := len(pLost)
if pCnt > p {
return errors.New("rs.Reconst: not enough vects")
}
return e.reconst(vects, has, dLost, pLost, dataOnly)
}
func (e *encSSSE3) reconstruct(vects [][]byte, dataOnly bool) (err error) {
d := e.data
p := e.parity
t := d + p
listBuf := make([]int, t+p)
has := listBuf[:d]
dLost := listBuf[d:t]
pLost := listBuf[t : t+p]
hasCnt, dCnt, pCnt := 0, 0, 0
for i := 0; i < t; i++ {
if vects[i] != nil {
if hasCnt < d {
has[hasCnt] = i
hasCnt++
}
} else {
if i < d {
if dCnt < p {
dLost[dCnt] = i
dCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
} else {
if pCnt < p {
pLost[pCnt] = i
pCnt++
} else {
return errors.New("rs.Reconst: not enough vects")
}
}
}
}
if hasCnt != d {
return errors.New("rs.Reconst: not enough vects")
}
dLost = dLost[:dCnt]
pLost = pLost[:pCnt]
return e.reconst(vects, has, dLost, pLost, dataOnly)
}

401
vendor/github.com/templexxx/reedsolomon/rs_amd64.s generated vendored

@ -0,0 +1,401 @@
// Reference: www.ssrc.ucsc.edu/Papers/plank-fast13.pdf
#include "textflag.h"
#define low_tbl Y0
#define high_tbl Y1
#define mask Y2
#define in0 Y3
#define in1 Y4
#define in2 Y5
#define in3 Y6
#define in4 Y7
#define in5 Y8
#define in0_h Y10
#define in1_h Y11
#define in2_h Y12
#define in3_h Y13
#define in4_h Y14
#define in5_h Y15
#define in BX
#define out DI
#define len R8
#define pos R9
#define tmp0 R10
#define low_tblx X0
#define high_tblx X1
#define maskx X2
#define in0x X3
#define in0_hx X10
#define tmp0x X9
#define tmp1x X11
#define tmp2x X12
#define tmp3x X13
// func mulVectAVX2(tbl, d, p []byte)
TEXT ·mulVectAVX2(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
VMOVDQU (tmp0), low_tblx
VMOVDQU 16(tmp0), high_tblx
MOVB $0x0f, DX
LONG $0x2069e3c4; WORD $0x00d2 // VPINSRB $0x00, EDX, XMM2, XMM2
VPBROADCASTB maskx, maskx
MOVQ in_len+32(FP), len
TESTQ $31, len
JNZ one16b
ymm:
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
VINSERTI128 $1, maskx, mask, mask
TESTQ $255, len
JNZ not_aligned
// 256bytes/loop
aligned:
MOVQ $0, pos
loop256b:
VMOVDQU (in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, (out)(pos*1)
VMOVDQU 32(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VMOVDQU in1, 32(out)(pos*1)
VMOVDQU 64(in)(pos*1), in2
VPSRLQ $4, in2, in2_h
VPAND mask, in2_h, in2_h
VPAND mask, in2, in2
VPSHUFB in2_h, high_tbl, in2_h
VPSHUFB in2, low_tbl, in2
VPXOR in2, in2_h, in2
VMOVDQU in2, 64(out)(pos*1)
VMOVDQU 96(in)(pos*1), in3
VPSRLQ $4, in3, in3_h
VPAND mask, in3_h, in3_h
VPAND mask, in3, in3
VPSHUFB in3_h, high_tbl, in3_h
VPSHUFB in3, low_tbl, in3
VPXOR in3, in3_h, in3
VMOVDQU in3, 96(out)(pos*1)
VMOVDQU 128(in)(pos*1), in4
VPSRLQ $4, in4, in4_h
VPAND mask, in4_h, in4_h
VPAND mask, in4, in4
VPSHUFB in4_h, high_tbl, in4_h
VPSHUFB in4, low_tbl, in4
VPXOR in4, in4_h, in4
VMOVDQU in4, 128(out)(pos*1)
VMOVDQU 160(in)(pos*1), in5
VPSRLQ $4, in5, in5_h
VPAND mask, in5_h, in5_h
VPAND mask, in5, in5
VPSHUFB in5_h, high_tbl, in5_h
VPSHUFB in5, low_tbl, in5
VPXOR in5, in5_h, in5
VMOVDQU in5, 160(out)(pos*1)
VMOVDQU 192(in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, 192(out)(pos*1)
VMOVDQU 224(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VMOVDQU in1, 224(out)(pos*1)
ADDQ $256, pos
CMPQ len, pos
JNE loop256b
VZEROUPPER
RET
not_aligned:
MOVQ len, tmp0
ANDQ $255, tmp0
loop32b:
VMOVDQU -32(in)(len*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VMOVDQU in0, -32(out)(len*1)
SUBQ $32, len
SUBQ $32, tmp0
JG loop32b
CMPQ len, $256
JGE aligned
VZEROUPPER
RET
one16b:
VMOVDQU -16(in)(len*1), in0x
VPSRLQ $4, in0x, in0_hx
VPAND maskx, in0x, in0x
VPAND maskx, in0_hx, in0_hx
VPSHUFB in0_hx, high_tblx, in0_hx
VPSHUFB in0x, low_tblx, in0x
VPXOR in0x, in0_hx, in0x
VMOVDQU in0x, -16(out)(len*1)
SUBQ $16, len
CMPQ len, $0
JNE ymm
RET
// func mulVectAddAVX2(tbl, d, p []byte)
TEXT ·mulVectAddAVX2(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
VMOVDQU (tmp0), low_tblx
VMOVDQU 16(tmp0), high_tblx
MOVB $0x0f, DX
LONG $0x2069e3c4; WORD $0x00d2
VPBROADCASTB maskx, maskx
MOVQ in_len+32(FP), len
TESTQ $31, len
JNZ one16b
ymm:
VINSERTI128 $1, low_tblx, low_tbl, low_tbl
VINSERTI128 $1, high_tblx, high_tbl, high_tbl
VINSERTI128 $1, maskx, mask, mask
TESTQ $255, len
JNZ not_aligned
aligned:
MOVQ $0, pos
loop256b:
VMOVDQU (in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR (out)(pos*1), in0, in0
VMOVDQU in0, (out)(pos*1)
VMOVDQU 32(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VPXOR 32(out)(pos*1), in1, in1
VMOVDQU in1, 32(out)(pos*1)
VMOVDQU 64(in)(pos*1), in2
VPSRLQ $4, in2, in2_h
VPAND mask, in2_h, in2_h
VPAND mask, in2, in2
VPSHUFB in2_h, high_tbl, in2_h
VPSHUFB in2, low_tbl, in2
VPXOR in2, in2_h, in2
VPXOR 64(out)(pos*1), in2, in2
VMOVDQU in2, 64(out)(pos*1)
VMOVDQU 96(in)(pos*1), in3
VPSRLQ $4, in3, in3_h
VPAND mask, in3_h, in3_h
VPAND mask, in3, in3
VPSHUFB in3_h, high_tbl, in3_h
VPSHUFB in3, low_tbl, in3
VPXOR in3, in3_h, in3
VPXOR 96(out)(pos*1), in3, in3
VMOVDQU in3, 96(out)(pos*1)
VMOVDQU 128(in)(pos*1), in4
VPSRLQ $4, in4, in4_h
VPAND mask, in4_h, in4_h
VPAND mask, in4, in4
VPSHUFB in4_h, high_tbl, in4_h
VPSHUFB in4, low_tbl, in4
VPXOR in4, in4_h, in4
VPXOR 128(out)(pos*1), in4, in4
VMOVDQU in4, 128(out)(pos*1)
VMOVDQU 160(in)(pos*1), in5
VPSRLQ $4, in5, in5_h
VPAND mask, in5_h, in5_h
VPAND mask, in5, in5
VPSHUFB in5_h, high_tbl, in5_h
VPSHUFB in5, low_tbl, in5
VPXOR in5, in5_h, in5
VPXOR 160(out)(pos*1), in5, in5
VMOVDQU in5, 160(out)(pos*1)
VMOVDQU 192(in)(pos*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR 192(out)(pos*1), in0, in0
VMOVDQU in0, 192(out)(pos*1)
VMOVDQU 224(in)(pos*1), in1
VPSRLQ $4, in1, in1_h
VPAND mask, in1_h, in1_h
VPAND mask, in1, in1
VPSHUFB in1_h, high_tbl, in1_h
VPSHUFB in1, low_tbl, in1
VPXOR in1, in1_h, in1
VPXOR 224(out)(pos*1), in1, in1
VMOVDQU in1, 224(out)(pos*1)
ADDQ $256, pos
CMPQ len, pos
JNE loop256b
VZEROUPPER
RET
not_aligned:
MOVQ len, tmp0
ANDQ $255, tmp0
loop32b:
VMOVDQU -32(in)(len*1), in0
VPSRLQ $4, in0, in0_h
VPAND mask, in0_h, in0_h
VPAND mask, in0, in0
VPSHUFB in0_h, high_tbl, in0_h
VPSHUFB in0, low_tbl, in0
VPXOR in0, in0_h, in0
VPXOR -32(out)(len*1), in0, in0
VMOVDQU in0, -32(out)(len*1)
SUBQ $32, len
SUBQ $32, tmp0
JG loop32b
CMPQ len, $256
JGE aligned
VZEROUPPER
RET
one16b:
VMOVDQU -16(in)(len*1), in0x
VPSRLQ $4, in0x, in0_hx
VPAND maskx, in0x, in0x
VPAND maskx, in0_hx, in0_hx
VPSHUFB in0_hx, high_tblx, in0_hx
VPSHUFB in0x, low_tblx, in0x
VPXOR in0x, in0_hx, in0x
VPXOR -16(out)(len*1), in0x, in0x
VMOVDQU in0x, -16(out)(len*1)
SUBQ $16, len
CMPQ len, $0
JNE ymm
RET
// func mulVectSSSE3(tbl, d, p []byte)
TEXT ·mulVectSSSE3(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
MOVOU (tmp0), low_tblx
MOVOU 16(tmp0), high_tblx
MOVB $15, tmp0
MOVQ tmp0, maskx
PXOR tmp0x, tmp0x
PSHUFB tmp0x, maskx
MOVQ in_len+32(FP), len
SHRQ $4, len
loop:
MOVOU (in), in0x
MOVOU in0x, in0_hx
PSRLQ $4, in0_hx
PAND maskx, in0x
PAND maskx, in0_hx
MOVOU low_tblx, tmp1x
MOVOU high_tblx, tmp2x
PSHUFB in0x, tmp1x
PSHUFB in0_hx, tmp2x
PXOR tmp1x, tmp2x
MOVOU tmp2x, (out)
ADDQ $16, in
ADDQ $16, out
SUBQ $1, len
JNZ loop
RET
// func mulVectAddSSSE3(tbl, d, p []byte)
TEXT ·mulVectAddSSSE3(SB), NOSPLIT, $0
MOVQ i+24(FP), in
MOVQ o+48(FP), out
MOVQ tbl+0(FP), tmp0
MOVOU (tmp0), low_tblx
MOVOU 16(tmp0), high_tblx
MOVB $15, tmp0
MOVQ tmp0, maskx
PXOR tmp0x, tmp0x
PSHUFB tmp0x, maskx
MOVQ in_len+32(FP), len
SHRQ $4, len
loop:
MOVOU (in), in0x
MOVOU in0x, in0_hx
PSRLQ $4, in0_hx
PAND maskx, in0x
PAND maskx, in0_hx
MOVOU low_tblx, tmp1x
MOVOU high_tblx, tmp2x
PSHUFB in0x, tmp1x
PSHUFB in0_hx, tmp2x
PXOR tmp1x, tmp2x
MOVOU (out), tmp3x
PXOR tmp3x, tmp2x
MOVOU tmp2x, (out)
ADDQ $16, in
ADDQ $16, out
SUBQ $1, len
JNZ loop
RET
// func copy32B(dst, src []byte)
TEXT ·copy32B(SB), NOSPLIT, $0
MOVQ dst+0(FP), SI
MOVQ src+24(FP), DX
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU X0, (SI)
MOVOU X1, 16(SI)
RET

8
vendor/github.com/templexxx/reedsolomon/rs_other.go generated vendored

@ -0,0 +1,8 @@
// +build !amd64
package reedsolomon
func newRS(d, p int, em matrix) (enc Encoder) {
g := em[d*d:]
return &encBase{data: d, parity: p, encode: em, gen: g}
}

44
vendor/github.com/templexxx/reedsolomon/tbl.go generated vendored

File diff suppressed because one or more lines are too long

1
vendor/github.com/templexxx/xor/.gitattributes generated vendored

@ -0,0 +1 @@
*.s linguist-language=go

18
vendor/github.com/templexxx/xor/.gitignore generated vendored

@ -0,0 +1,18 @@
# Binaries for programs and plugins
*.exe
*.dll
*.so
*.dylib
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
.glide/
/backup/
/backup2/
/.idea
/backup3/

21
vendor/github.com/templexxx/xor/LICENSE generated vendored

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2017 Temple3x
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

48
vendor/github.com/templexxx/xor/README.md generated vendored

@ -0,0 +1,48 @@
# XOR
XOR code engine in pure Go
more than 10GB/S per core
## Introduction:
1. Use SIMD (SSE2 or AVX2) for speeding up
2. ...
## Installation
To get the package use the standard:
```bash
go get github.com/templexxx/xor
```
## Documentation
See the associated [GoDoc](http://godoc.org/github.com/templexxx/xor)
## Performance
Performance depends mainly on:
1. SIMD extension
2. unit size of worker
3. hardware ( CPU RAM etc)
Example of performance on my MacBook 2014-mid(i5-4278U 2.6GHz 2 physical cores). The 16MB per shards.
```
speed = ( shards * size ) / cost
```
| data_shards | shard_size |speed (MB/S) |
|----------|----|-----|
| 2 |1KB|64127.95 |
|2|1400B|59657.55|
|2|16KB|35370.84|
| 2 | 16MB|12128.95 |
| 5 |1KB| 78837.33 |
|5|1400B|58054.89|
|5|16KB|50161.19|
|5| 16MB|12750.41|
## Who is using this?
1. https://github.com/xtaci/kcp-go -- A Production-Grade Reliable-UDP Library for golang

438
vendor/github.com/templexxx/xor/avx2_amd64.s generated vendored

@ -0,0 +1,438 @@
#include "textflag.h"
// addr of mem
#define DST BX
#define SRC SI
#define SRC0 TMP4
#define SRC1 TMP5
// loop args
// num of vect
#define VECT CX
#define LEN DX
// pos of matrix
#define POS R8
// tmp store
// num of vect or ...
#define TMP1 R9
// pos of matrix or ...
#define TMP2 R10
// store addr of data/parity or ...
#define TMP3 R11
#define TMP4 R12
#define TMP5 R13
#define TMP6 R14
// func bytesAVX2mini(dst, src0, src1 []byte, size int)
TEXT ·bytesAVX2mini(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $31, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop32b:
VMOVDQU (SRC0)(POS*1), Y0
VPXOR (SRC1)(POS*1), Y0, Y0
VMOVDQU Y0, (DST)(POS*1)
ADDQ $32, POS
CMPQ LEN, POS
JNE loop32b
VZEROUPPER
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $31, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $31, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $32
JGE aligned
RET
ret:
RET
// func bytesAVX2small(dst, src0, src1 []byte, size int)
TEXT ·bytesAVX2small(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $127, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop128b:
VMOVDQU (SRC0)(POS*1), Y0
VMOVDQU 32(SRC0)(POS*1), Y1
VMOVDQU 64(SRC0)(POS*1), Y2
VMOVDQU 96(SRC0)(POS*1), Y3
VPXOR (SRC1)(POS*1), Y0, Y0
VPXOR 32(SRC1)(POS*1), Y1, Y1
VPXOR 64(SRC1)(POS*1), Y2, Y2
VPXOR 96(SRC1)(POS*1), Y3, Y3
VMOVDQU Y0, (DST)(POS*1)
VMOVDQU Y1, 32(DST)(POS*1)
VMOVDQU Y2, 64(DST)(POS*1)
VMOVDQU Y3, 96(DST)(POS*1)
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
VZEROUPPER
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $127, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $127, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $128
JGE aligned
RET
ret:
RET
// func bytesAVX2big(dst, src0, src1 []byte, size int)
TEXT ·bytesAVX2big(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $127, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop128b:
VMOVDQU (SRC0)(POS*1), Y0
VMOVDQU 32(SRC0)(POS*1), Y1
VMOVDQU 64(SRC0)(POS*1), Y2
VMOVDQU 96(SRC0)(POS*1), Y3
VPXOR (SRC1)(POS*1), Y0, Y0
VPXOR 32(SRC1)(POS*1), Y1, Y1
VPXOR 64(SRC1)(POS*1), Y2, Y2
VPXOR 96(SRC1)(POS*1), Y3, Y3
LONG $0xe77da1c4; WORD $0x0304
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
SFENCE
VZEROUPPER
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $127, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $127, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $128
JGE aligned
RET
ret:
RET
// func matrixAVX2small(dst []byte, src [][]byte)
TEXT ·matrixAVX2small(SB), NOSPLIT, $0
MOVQ dst+0(FP), DST
MOVQ src+24(FP), SRC
MOVQ vec+32(FP), VECT
MOVQ len+8(FP), LEN
TESTQ $127, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop128b:
MOVQ VECT, TMP1
SUBQ $2, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
VMOVDQU (TMP3)(POS*1), Y0
VMOVDQU 32(TMP4)(POS*1), Y1
VMOVDQU 64(TMP3)(POS*1), Y2
VMOVDQU 96(TMP4)(POS*1), Y3
next_vect:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
VMOVDQU (TMP3)(POS*1), Y4
VMOVDQU 32(TMP4)(POS*1), Y5
VMOVDQU 64(TMP3)(POS*1), Y6
VMOVDQU 96(TMP4)(POS*1), Y7
VPXOR Y4, Y0, Y0
VPXOR Y5, Y1, Y1
VPXOR Y6, Y2, Y2
VPXOR Y7, Y3, Y3
SUBQ $1, TMP1
JGE next_vect
VMOVDQU Y0, (DST)(POS*1)
VMOVDQU Y1, 32(DST)(POS*1)
VMOVDQU Y2, 64(DST)(POS*1)
VMOVDQU Y3, 96(DST)(POS*1)
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
VZEROUPPER
RET
loop_1b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVB -1(TMP3)(LEN*1), TMP5
next_vect_1b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVB -1(TMP3)(LEN*1), TMP6
XORB TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_1b
MOVB TMP5, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $127, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP4
ANDQ $127, TMP4
loop_8b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVQ -8(TMP3)(LEN*1), TMP5
next_vect_8b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ -8(TMP3)(LEN*1), TMP6
XORQ TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_8b
MOVQ TMP5, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP4
JG loop_8b
CMPQ LEN, $128
JGE aligned
RET
ret:
RET
// func matrixAVX2big(dst []byte, src [][]byte)
TEXT ·matrixAVX2big(SB), NOSPLIT, $0
MOVQ dst+0(FP), DST
MOVQ src+24(FP), SRC
MOVQ vec+32(FP), VECT
MOVQ len+8(FP), LEN
TESTQ $127, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop128b:
MOVQ VECT, TMP1
SUBQ $2, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
VMOVDQU (TMP3)(POS*1), Y0
VMOVDQU 32(TMP4)(POS*1), Y1
VMOVDQU 64(TMP3)(POS*1), Y2
VMOVDQU 96(TMP4)(POS*1), Y3
next_vect:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
VMOVDQU (TMP3)(POS*1), Y4
VMOVDQU 32(TMP4)(POS*1), Y5
VMOVDQU 64(TMP3)(POS*1), Y6
VMOVDQU 96(TMP4)(POS*1), Y7
VPXOR Y4, Y0, Y0
VPXOR Y5, Y1, Y1
VPXOR Y6, Y2, Y2
VPXOR Y7, Y3, Y3
SUBQ $1, TMP1
JGE next_vect
LONG $0xe77da1c4; WORD $0x0304 // VMOVNTDQ go1.8 has
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
ADDQ $128, POS
CMPQ LEN, POS
JNE loop128b
VZEROUPPER
RET
loop_1b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVB -1(TMP3)(LEN*1), TMP5
next_vect_1b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVB -1(TMP3)(LEN*1), TMP6
XORB TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_1b
MOVB TMP5, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $127, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP4
ANDQ $127, TMP4
loop_8b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVQ -8(TMP3)(LEN*1), TMP5
next_vect_8b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ -8(TMP3)(LEN*1), TMP6
XORQ TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_8b
MOVQ TMP5, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP4
JG loop_8b
CMPQ LEN, $128
JGE aligned
RET
ret:
RET

116
vendor/github.com/templexxx/xor/nosimd.go generated vendored

@ -0,0 +1,116 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xor
import (
"runtime"
"unsafe"
)
const wordSize = int(unsafe.Sizeof(uintptr(0)))
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
// xor the bytes in a and b. The destination is assumed to have enough space.
func bytesNoSIMD(dst, a, b []byte, size int) {
if supportsUnaligned {
fastXORBytes(dst, a, b, size)
} else {
// TODO(hanwen): if (dst, a, b) have common alignment
// we could still try fastXORBytes. It is not clear
// how often this happens, and it's only worth it if
// the block encryption itself is hardware
// accelerated.
safeXORBytes(dst, a, b, size)
}
}
// split slice for cache-friendly
const unitSize = 16 * 1024
func matrixNoSIMD(dst []byte, src [][]byte) {
size := len(src[0])
start := 0
do := unitSize
for start < size {
end := start + do
if end <= size {
partNoSIMD(start, end, dst, src)
start = start + do
} else {
partNoSIMD(start, size, dst, src)
start = size
}
}
}
// split vect will improve performance with big data by reducing cache pollution
func partNoSIMD(start, end int, dst []byte, src [][]byte) {
bytesNoSIMD(dst[start:end], src[0][start:end], src[1][start:end], end-start)
for i := 2; i < len(src); i++ {
bytesNoSIMD(dst[start:end], dst[start:end], src[i][start:end], end-start)
}
}
// fastXORBytes xor in bulk. It only works on architectures that
// support unaligned read/writes.
func fastXORBytes(dst, a, b []byte, n int) {
w := n / wordSize
if w > 0 {
wordBytes := w * wordSize
fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
}
for i := n - n%wordSize; i < n; i++ {
dst[i] = a[i] ^ b[i]
}
}
func safeXORBytes(dst, a, b []byte, n int) {
ex := n % 8
for i := 0; i < ex; i++ {
dst[i] = a[i] ^ b[i]
}
for i := ex; i < n; i += 8 {
_dst := dst[i : i+8]
_a := a[i : i+8]
_b := b[i : i+8]
_dst[0] = _a[0] ^ _b[0]
_dst[1] = _a[1] ^ _b[1]
_dst[2] = _a[2] ^ _b[2]
_dst[3] = _a[3] ^ _b[3]
_dst[4] = _a[4] ^ _b[4]
_dst[5] = _a[5] ^ _b[5]
_dst[6] = _a[6] ^ _b[6]
_dst[7] = _a[7] ^ _b[7]
}
}
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
// The arguments are assumed to be of equal length.
func fastXORWords(dst, a, b []byte) {
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
n := len(b) / wordSize
ex := n % 8
for i := 0; i < ex; i++ {
dw[i] = aw[i] ^ bw[i]
}
for i := ex; i < n; i += 8 {
_dw := dw[i : i+8]
_aw := aw[i : i+8]
_bw := bw[i : i+8]
_dw[0] = _aw[0] ^ _bw[0]
_dw[1] = _aw[1] ^ _bw[1]
_dw[2] = _aw[2] ^ _bw[2]
_dw[3] = _aw[3] ^ _bw[3]
_dw[4] = _aw[4] ^ _bw[4]
_dw[5] = _aw[5] ^ _bw[5]
_dw[6] = _aw[6] ^ _bw[6]
_dw[7] = _aw[7] ^ _bw[7]
}
}

574
vendor/github.com/templexxx/xor/sse2_amd64.s generated vendored

@ -0,0 +1,574 @@
#include "textflag.h"
// addr of mem
#define DST BX
#define SRC SI
#define SRC0 TMP4
#define SRC1 TMP5
// loop args
// num of vect
#define VECT CX
#define LEN DX
// pos of matrix
#define POS R8
// tmp store
// num of vect or ...
#define TMP1 R9
// pos of matrix or ...
#define TMP2 R10
// store addr of data/parity or ...
#define TMP3 R11
#define TMP4 R12
#define TMP5 R13
#define TMP6 R14
// func bytesSrc0(dst, src0, src1 []byte)
TEXT ·xorSrc0(SB), NOSPLIT, $0
MOVQ len+32(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $15, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop16b:
MOVOU (SRC0)(POS*1), X0
XORPD (SRC1)(POS*1), X0
MOVOU X0, (DST)(POS*1)
ADDQ $16, POS
CMPQ LEN, POS
JNE loop16b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $15, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $15, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $16
JGE aligned
RET
ret:
RET
// func bytesSrc1(dst, src0, src1 []byte)
TEXT ·xorSrc1(SB), NOSPLIT, $0
MOVQ len+56(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $15, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop16b:
MOVOU (SRC0)(POS*1), X0
XORPD (SRC1)(POS*1), X0
MOVOU X0, (DST)(POS*1)
ADDQ $16, POS
CMPQ LEN, POS
JNE loop16b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $15, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $15, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $16
JGE aligned
RET
ret:
RET
// func bytesSSE2mini(dst, src0, src1 []byte, size int)
TEXT ·bytesSSE2mini(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $15, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop16b:
MOVOU (SRC0)(POS*1), X0
XORPD (SRC1)(POS*1), X0
// MOVOU (SRC1)(POS*1), X4
// PXOR X4, X0
MOVOU X0, (DST)(POS*1)
ADDQ $16, POS
CMPQ LEN, POS
JNE loop16b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $15, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $15, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $16
JGE aligned
RET
ret:
RET
// func bytesSSE2small(dst, src0, src1 []byte, size int)
TEXT ·bytesSSE2small(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $63, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop64b:
MOVOU (SRC0)(POS*1), X0
MOVOU 16(SRC0)(POS*1), X1
MOVOU 32(SRC0)(POS*1), X2
MOVOU 48(SRC0)(POS*1), X3
MOVOU (SRC1)(POS*1), X4
MOVOU 16(SRC1)(POS*1), X5
MOVOU 32(SRC1)(POS*1), X6
MOVOU 48(SRC1)(POS*1), X7
PXOR X4, X0
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
MOVOU X0, (DST)(POS*1)
MOVOU X1, 16(DST)(POS*1)
MOVOU X2, 32(DST)(POS*1)
MOVOU X3, 48(DST)(POS*1)
ADDQ $64, POS
CMPQ LEN, POS
JNE loop64b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $63, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $63, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $64
JGE aligned
RET
ret:
RET
// func bytesSSE2big(dst, src0, src1 []byte, size int)
TEXT ·bytesSSE2big(SB), NOSPLIT, $0
MOVQ len+72(FP), LEN
CMPQ LEN, $0
JE ret
MOVQ dst+0(FP), DST
MOVQ src0+24(FP), SRC0
MOVQ src1+48(FP), SRC1
TESTQ $63, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop64b:
MOVOU (SRC0)(POS*1), X0
MOVOU 16(SRC0)(POS*1), X1
MOVOU 32(SRC0)(POS*1), X2
MOVOU 48(SRC0)(POS*1), X3
MOVOU (SRC1)(POS*1), X4
MOVOU 16(SRC1)(POS*1), X5
MOVOU 32(SRC1)(POS*1), X6
MOVOU 48(SRC1)(POS*1), X7
PXOR X4, X0
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
LONG $0xe70f4266; WORD $0x0304 // MOVNTDQ
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
ADDQ $64, POS
CMPQ LEN, POS
JNE loop64b
RET
loop_1b:
MOVB -1(SRC0)(LEN*1), TMP1
MOVB -1(SRC1)(LEN*1), TMP2
XORB TMP1, TMP2
MOVB TMP2, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $63, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP1
ANDQ $63, TMP1
loop_8b:
MOVQ -8(SRC0)(LEN*1), TMP2
MOVQ -8(SRC1)(LEN*1), TMP3
XORQ TMP2, TMP3
MOVQ TMP3, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP1
JG loop_8b
CMPQ LEN, $64
JGE aligned
RET
ret:
RET
// func matrixSSE2small(dst []byte, src [][]byte)
TEXT ·matrixSSE2small(SB), NOSPLIT, $0
MOVQ dst+0(FP), DST
MOVQ src+24(FP), SRC
MOVQ vec+32(FP), VECT
MOVQ len+8(FP), LEN
TESTQ $63, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop64b:
MOVQ VECT, TMP1
SUBQ $2, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
MOVOU (TMP3)(POS*1), X0
MOVOU 16(TMP4)(POS*1), X1
MOVOU 32(TMP3)(POS*1), X2
MOVOU 48(TMP4)(POS*1), X3
next_vect:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
MOVOU (TMP3)(POS*1), X4
MOVOU 16(TMP4)(POS*1), X5
MOVOU 32(TMP3)(POS*1), X6
MOVOU 48(TMP4)(POS*1), X7
PXOR X4, X0
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
SUBQ $1, TMP1
JGE next_vect
MOVOU X0, (DST)(POS*1)
MOVOU X1, 16(DST)(POS*1)
MOVOU X2, 32(DST)(POS*1)
MOVOU X3, 48(DST)(POS*1)
ADDQ $64, POS
CMPQ LEN, POS
JNE loop64b
RET
loop_1b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVB -1(TMP3)(LEN*1), TMP5
next_vect_1b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVB -1(TMP3)(LEN*1), TMP6
XORB TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_1b
MOVB TMP5, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $63, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP4
ANDQ $63, TMP4
loop_8b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVQ -8(TMP3)(LEN*1), TMP5
next_vect_8b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ -8(TMP3)(LEN*1), TMP6
XORQ TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_8b
MOVQ TMP5, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP4
JG loop_8b
CMPQ LEN, $64
JGE aligned
RET
ret:
RET
// func matrixSSE2big(dst []byte, src [][]byte)
TEXT ·matrixSSE2big(SB), NOSPLIT, $0
MOVQ dst+0(FP), DST
MOVQ src+24(FP), SRC
MOVQ vec+32(FP), VECT
MOVQ len+8(FP), LEN
TESTQ $63, LEN
JNZ not_aligned
aligned:
MOVQ $0, POS
loop64b:
MOVQ VECT, TMP1
SUBQ $2, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
MOVOU (TMP3)(POS*1), X0
MOVOU 16(TMP4)(POS*1), X1
MOVOU 32(TMP3)(POS*1), X2
MOVOU 48(TMP4)(POS*1), X3
next_vect:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ TMP3, TMP4
MOVOU (TMP3)(POS*1), X4
MOVOU 16(TMP4)(POS*1), X5
MOVOU 32(TMP3)(POS*1), X6
MOVOU 48(TMP4)(POS*1), X7
PXOR X4, X0
PXOR X5, X1
PXOR X6, X2
PXOR X7, X3
SUBQ $1, TMP1
JGE next_vect
LONG $0xe70f4266; WORD $0x0304
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
ADDQ $64, POS
CMPQ LEN, POS
JNE loop64b
RET
loop_1b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVB -1(TMP3)(LEN*1), TMP5
next_vect_1b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVB -1(TMP3)(LEN*1), TMP6
XORB TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_1b
MOVB TMP5, -1(DST)(LEN*1)
SUBQ $1, LEN
TESTQ $7, LEN
JNZ loop_1b
CMPQ LEN, $0
JE ret
TESTQ $63, LEN
JZ aligned
not_aligned:
TESTQ $7, LEN
JNE loop_1b
MOVQ LEN, TMP4
ANDQ $63, TMP4
loop_8b:
MOVQ VECT, TMP1
MOVQ $0, TMP2
MOVQ (SRC)(TMP2*1), TMP3
SUBQ $2, TMP1
MOVQ -8(TMP3)(LEN*1), TMP5
next_vect_8b:
ADDQ $24, TMP2
MOVQ (SRC)(TMP2*1), TMP3
MOVQ -8(TMP3)(LEN*1), TMP6
XORQ TMP6, TMP5
SUBQ $1, TMP1
JGE next_vect_8b
MOVQ TMP5, -8(DST)(LEN*1)
SUBQ $8, LEN
SUBQ $8, TMP4
JG loop_8b
CMPQ LEN, $64
JGE aligned
RET
ret:
RET
TEXT ·hasSSE2(SB), NOSPLIT, $0
XORQ AX, AX
INCL AX
CPUID
SHRQ $26, DX
ANDQ $1, DX
MOVB DX, ret+0(FP)
RET

49
vendor/github.com/templexxx/xor/xor.go generated vendored

@ -0,0 +1,49 @@
package xor
// SIMD Extensions
const (
none = iota
avx2
// first introduced by Intel with the initial version of the Pentium 4 in 2001
// so I think we can assume all amd64 has sse2
sse2
)
var extension = none
// Bytes : chose the shortest one as xor size
// it's better to use it for big data ( > 64bytes )
func Bytes(dst, src0, src1 []byte) {
size := len(dst)
if size > len(src0) {
size = len(src0)
}
if size > len(src1) {
size = len(src1)
}
xorBytes(dst, src0, src1, size)
}
// BytesSameLen : all slice's length must be equal
// cut size branch, save time for small data
func BytesSameLen(dst, src0, src1 []byte) {
xorSrc1(dst, src0, src1)
}
// BytesSrc0 : src1 >= src0, dst >= src0
// xor src0's len bytes
func BytesSrc0(dst, src0, src1 []byte) {
xorSrc0(dst, src0, src1)
}
// BytesSrc1 : src0 >= src1, dst >= src1
// xor src1's len bytes
func BytesSrc1(dst, src0, src1 []byte) {
xorSrc1(dst, src0, src1)
}
// Matrix : all slice's length must be equal && != 0
// len(src) must >= 2
func Matrix(dst []byte, src [][]byte) {
xorMatrix(dst, src)
}

120
vendor/github.com/templexxx/xor/xor_amd64.go generated vendored

@ -0,0 +1,120 @@
package xor
import "github.com/templexxx/cpufeat"
func init() {
getEXT()
}
func getEXT() {
if cpufeat.X86.HasAVX2 {
extension = avx2
} else {
extension = sse2
}
return
}
func xorBytes(dst, src0, src1 []byte, size int) {
switch extension {
case avx2:
bytesAVX2(dst, src0, src1, size)
default:
bytesSSE2(dst, src0, src1, size)
}
}
// non-temporal hint store
const nontmp = 8 * 1024
const avx2loopsize = 128
func bytesAVX2(dst, src0, src1 []byte, size int) {
if size < avx2loopsize {
bytesAVX2mini(dst, src0, src1, size)
} else if size >= avx2loopsize && size <= nontmp {
bytesAVX2small(dst, src0, src1, size)
} else {
bytesAVX2big(dst, src0, src1, size)
}
}
const sse2loopsize = 64
func bytesSSE2(dst, src0, src1 []byte, size int) {
if size < sse2loopsize {
bytesSSE2mini(dst, src0, src1, size)
} else if size >= sse2loopsize && size <= nontmp {
bytesSSE2small(dst, src0, src1, size)
} else {
bytesSSE2big(dst, src0, src1, size)
}
}
func xorMatrix(dst []byte, src [][]byte) {
switch extension {
case avx2:
matrixAVX2(dst, src)
default:
matrixSSE2(dst, src)
}
}
func matrixAVX2(dst []byte, src [][]byte) {
size := len(dst)
if size > nontmp {
matrixAVX2big(dst, src)
} else {
matrixAVX2small(dst, src)
}
}
func matrixSSE2(dst []byte, src [][]byte) {
size := len(dst)
if size > nontmp {
matrixSSE2big(dst, src)
} else {
matrixSSE2small(dst, src)
}
}
//go:noescape
func xorSrc0(dst, src0, src1 []byte)
//go:noescape
func xorSrc1(dst, src0, src1 []byte)
//go:noescape
func bytesAVX2mini(dst, src0, src1 []byte, size int)
//go:noescape
func bytesAVX2big(dst, src0, src1 []byte, size int)
//go:noescape
func bytesAVX2small(dst, src0, src1 []byte, size int)
//go:noescape
func bytesSSE2mini(dst, src0, src1 []byte, size int)
//go:noescape
func bytesSSE2small(dst, src0, src1 []byte, size int)
//go:noescape
func bytesSSE2big(dst, src0, src1 []byte, size int)
//go:noescape
func matrixAVX2small(dst []byte, src [][]byte)
//go:noescape
func matrixAVX2big(dst []byte, src [][]byte)
//go:noescape
func matrixSSE2small(dst []byte, src [][]byte)
//go:noescape
func matrixSSE2big(dst []byte, src [][]byte)
//go:noescape
func hasAVX2() bool
//go:noescape
func hasSSE2() bool

19
vendor/github.com/templexxx/xor/xor_other.go generated vendored

@ -0,0 +1,19 @@
// +build !amd64 noasm
package xor
func xorBytes(dst, src0, src1 []byte, size int) {
bytesNoSIMD(dst, src0, src1, size)
}
func xorMatrix(dst []byte, src [][]byte) {
matrixNoSIMD(dst, src)
}
func xorSrc0(dst, src0, src1 []byte) {
bytesNoSIMD(dst, src0, src1, len(src0))
}
func xorSrc1(dst, src0, src1 []byte) {
bytesNoSIMD(dst, src0, src1, len(src1))
}

201
vendor/github.com/tjfoc/gmsm/LICENSE generated vendored

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

291
vendor/github.com/tjfoc/gmsm/sm4/sm4.go generated vendored

@ -0,0 +1,291 @@
/*
Copyright Suzhou Tongji Fintech Research Institute 2017 All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sm4
import (
"crypto/cipher"
"crypto/rand"
"crypto/x509"
"encoding/pem"
"errors"
"io/ioutil"
"os"
"strconv"
)
const BlockSize = 16
type SM4Key []byte
type KeySizeError int
// Cipher is an instance of SM4 encryption.
type Sm4Cipher struct {
subkeys []uint32
block1 []uint32
block2 []byte
}
// sm4密钥参量
var fk = [4]uint32{
0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc,
}
// sm4密钥参量
var ck = [32]uint32{
0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279,
}
// sm4密钥参量
var sbox = [256]uint8{
0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
}
func rl(x uint32, i uint8) uint32 { return (x << (i % 32)) | (x >> (32 - (i % 32))) }
func l0(b uint32) uint32 { return b ^ rl(b, 13) ^ rl(b, 23) }
func l1(b uint32) uint32 { return b ^ rl(b, 2) ^ rl(b, 10) ^ rl(b, 18) ^ rl(b, 24) }
func feistel0(x0, x1, x2, x3, rk uint32) uint32 { return x0 ^ l0(p(x1^x2^x3^rk)) }
func feistel1(x0, x1, x2, x3, rk uint32) uint32 { return x0 ^ l1(p(x1^x2^x3^rk)) }
//非线性变换τ(.)
func p(a uint32) uint32 {
return (uint32(sbox[a>>24]) << 24) ^ (uint32(sbox[(a>>16)&0xff]) << 16) ^ (uint32(sbox[(a>>8)&0xff]) << 8) ^ uint32(sbox[(a)&0xff])
}
/*
func permuteInitialBlock(block []byte) []uint32 {
b := make([]uint32, 4, 4)
for i := 0; i < 4; i++ {
b[i] = (uint32(block[i*4]) << 24) | (uint32(block[i*4+1]) << 16) |
(uint32(block[i*4+2]) << 8) | (uint32(block[i*4+3]))
}
return b
}
func permuteFinalBlock(block []uint32) []byte {
b := make([]byte, 16, 16)
for i := 0; i < 4; i++ {
b[i*4] = uint8(block[i] >> 24)
b[i*4+1] = uint8(block[i] >> 16)
b[i*4+2] = uint8(block[i] >> 8)
b[i*4+3] = uint8(block[i])
}
return b
}
func cryptBlock(subkeys []uint32, dst, src []byte, decrypt bool) {
var tm uint32
b := permuteInitialBlock(src)
for i := 0; i < 32; i++ {
if decrypt {
tm = feistel1(b[0], b[1], b[2], b[3], subkeys[31-i])
} else {
tm = feistel1(b[0], b[1], b[2], b[3], subkeys[i])
}
b[0], b[1], b[2], b[3] = b[1], b[2], b[3], tm
}
b[0], b[1], b[2], b[3] = b[3], b[2], b[1], b[0]
copy(dst, permuteFinalBlock(b))
}
*/
func permuteInitialBlock(b []uint32, block []byte) {
for i := 0; i < 4; i++ {
b[i] = (uint32(block[i*4]) << 24) | (uint32(block[i*4+1]) << 16) |
(uint32(block[i*4+2]) << 8) | (uint32(block[i*4+3]))
}
}
func permuteFinalBlock(b []byte, block []uint32) {
for i := 0; i < 4; i++ {
b[i*4] = uint8(block[i] >> 24)
b[i*4+1] = uint8(block[i] >> 16)
b[i*4+2] = uint8(block[i] >> 8)
b[i*4+3] = uint8(block[i])
}
}
func cryptBlock(subkeys []uint32, b []uint32, r []byte, dst, src []byte, decrypt bool) {
var tm uint32
permuteInitialBlock(b, src)
for i := 0; i < 32; i++ {
if decrypt {
tm = b[0] ^ l1(p(b[1]^b[2]^b[3]^subkeys[31 - i]))
// tm = feistel1(b[0], b[1], b[2], b[3], subkeys[31-i])
} else {
tm = b[0] ^ l1(p(b[1]^b[2]^b[3]^subkeys[i]))
// tm = feistel1(b[0], b[1], b[2], b[3], subkeys[i])
}
b[0], b[1], b[2], b[3] = b[1], b[2], b[3], tm
}
b[0], b[1], b[2], b[3] = b[3], b[2], b[1], b[0]
permuteFinalBlock(r, b)
copy(dst, r)
}
func generateSubKeys(key []byte) []uint32 {
subkeys := make([]uint32, 32)
b := make([]uint32, 4)
// b := permuteInitialBlock(key)
permuteInitialBlock(b, key)
b[0] ^= fk[0]
b[1] ^= fk[1]
b[2] ^= fk[2]
b[3] ^= fk[3]
for i := 0; i < 32; i++ {
subkeys[i] = feistel0(b[0], b[1], b[2], b[3], ck[i])
b[0], b[1], b[2], b[3] = b[1], b[2], b[3], subkeys[i]
}
return subkeys
}
func EncryptBlock(key SM4Key, dst, src []byte) {
subkeys := generateSubKeys(key)
cryptBlock(subkeys, make([]uint32, 4), make([]byte, 16), dst, src, false)
}
func DecryptBlock(key SM4Key, dst, src []byte) {
subkeys := generateSubKeys(key)
cryptBlock(subkeys, make([]uint32, 4), make([]byte, 16), dst, src, true)
}
func ReadKeyFromMem(data []byte, pwd []byte) (SM4Key, error) {
block, _ := pem.Decode(data)
if x509.IsEncryptedPEMBlock(block) {
if block.Type != "SM4 ENCRYPTED KEY" {
return nil, errors.New("SM4: unknown type")
}
if pwd == nil {
return nil, errors.New("SM4: need passwd")
}
data, err := x509.DecryptPEMBlock(block, pwd)
if err != nil {
return nil, err
}
return data, nil
}
if block.Type != "SM4 KEY" {
return nil, errors.New("SM4: unknown type")
}
return block.Bytes, nil
}
func ReadKeyFromPem(FileName string, pwd []byte) (SM4Key, error) {
data, err := ioutil.ReadFile(FileName)
if err != nil {
return nil, err
}
return ReadKeyFromMem(data, pwd)
}
func WriteKeytoMem(key SM4Key, pwd []byte) ([]byte, error) {
if pwd != nil {
block, err := x509.EncryptPEMBlock(rand.Reader,
"SM4 ENCRYPTED KEY", key, pwd, x509.PEMCipherAES256)
if err != nil {
return nil, err
}
return pem.EncodeToMemory(block), nil
} else {
block := &pem.Block{
Type: "SM4 KEY",
Bytes: key,
}
return pem.EncodeToMemory(block), nil
}
}
func WriteKeyToPem(FileName string, key SM4Key, pwd []byte) (bool, error) {
var block *pem.Block
if pwd != nil {
var err error
block, err = x509.EncryptPEMBlock(rand.Reader,
"SM4 ENCRYPTED KEY", key, pwd, x509.PEMCipherAES256)
if err != nil {
return false, err
}
} else {
block = &pem.Block{
Type: "SM4 KEY",
Bytes: key,
}
}
file, err := os.Create(FileName)
if err != nil {
return false, err
}
defer file.Close()
err = pem.Encode(file, block)
if err != nil {
return false, nil
}
return true, nil
}
func (k KeySizeError) Error() string {
return "SM4: invalid key size " + strconv.Itoa(int(k))
}
// NewCipher creates and returns a new cipher.Block.
func NewCipher(key []byte) (cipher.Block, error) {
if len(key) != BlockSize {
return nil, KeySizeError(len(key))
}
c := new(Sm4Cipher)
c.subkeys = generateSubKeys(key)
c.block1 = make([]uint32, 4)
c.block2 = make([]byte, 16)
return c, nil
}
func (c *Sm4Cipher) BlockSize() int {
return BlockSize
}
func (c *Sm4Cipher) Encrypt(dst, src []byte) {
cryptBlock(c.subkeys, c.block1, c.block2, dst, src, false)
}
func (c *Sm4Cipher) Decrypt(dst, src []byte) {
cryptBlock(c.subkeys, c.block1, c.block2, dst, src, true)
}
Loading…
Cancel
Save