diff --git a/common/crypto/benchmark_test.go b/common/crypto/benchmark_test.go new file mode 100644 index 00000000..14396b5f --- /dev/null +++ b/common/crypto/benchmark_test.go @@ -0,0 +1,49 @@ +package crypto_test + +import ( + "crypto/cipher" + "testing" + + . "github.com/v2ray/v2ray-core/common/crypto" +) + +const benchSize = 1024 * 1024 + +func benchmarkStream(b *testing.B, c cipher.Stream) { + b.SetBytes(benchSize) + input := make([]byte, benchSize) + output := make([]byte, benchSize) + for i := 0; i < b.N; i++ { + c.XORKeyStream(output, input) + } +} + +func BenchmarkChaCha20(b *testing.B) { + key := make([]byte, 32) + nonce := make([]byte, 8) + c := NewChaCha20Stream(key, nonce) + benchmarkStream(b, c) +} + +func BenchmarkChaCha20IETF(b *testing.B) { + key := make([]byte, 32) + nonce := make([]byte, 12) + c := NewChaCha20Stream(key, nonce) + benchmarkStream(b, c) +} + +func BenchmarkAESEncryption(b *testing.B) { + key := make([]byte, 32) + iv := make([]byte, 16) + c, _ := NewAesEncryptionStream(key, iv) + + benchmarkStream(b, c) +} + +func BenchmarkAESDecryption(b *testing.B) { + key := make([]byte, 32) + iv := make([]byte, 16) + c, _ := NewAesDecryptionStream(key, iv) + + benchmarkStream(b, c) +} diff --git a/common/crypto/chacha20.go b/common/crypto/chacha20.go new file mode 100644 index 00000000..699a00fa --- /dev/null +++ b/common/crypto/chacha20.go @@ -0,0 +1,11 @@ +package crypto + +import ( + "crypto/cipher" + + "github.com/v2ray/v2ray-core/common/crypto/internal" +) + +func NewChaCha20Stream(key []byte, iv []byte) cipher.Stream { + return internal.NewChaCha20Stream(key, iv, 20) +} diff --git a/common/crypto/chacha20_test.go b/common/crypto/chacha20_test.go new file mode 100644 index 00000000..1f16ec95 --- /dev/null +++ b/common/crypto/chacha20_test.go @@ -0,0 +1,57 @@ +package crypto_test + +import ( + "encoding/hex" + "testing" + + . "github.com/v2ray/v2ray-core/common/crypto" + v2testing "github.com/v2ray/v2ray-core/testing" + "github.com/v2ray/v2ray-core/testing/assert" +) + +func mustDecodeHex(s string) []byte { + b, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return b +} + +func TestChaCha20Stream(t *testing.T) { + v2testing.Current(t) + + var cases = []struct { + key []byte + iv []byte + output []byte + }{ + { + key: mustDecodeHex("0000000000000000000000000000000000000000000000000000000000000000"), + iv: mustDecodeHex("0000000000000000"), + output: mustDecodeHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7" + + "da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586" + + "9f07e7be5551387a98ba977c732d080dcb0f29a048e3656912c6533e32ee7aed" + + "29b721769ce64e43d57133b074d839d531ed1f28510afb45ace10a1f4b794d6f"), + }, + { + key: mustDecodeHex("5555555555555555555555555555555555555555555555555555555555555555"), + iv: mustDecodeHex("5555555555555555"), + output: mustDecodeHex("bea9411aa453c5434a5ae8c92862f564396855a9ea6e22d6d3b50ae1b3663311" + + "a4a3606c671d605ce16c3aece8e61ea145c59775017bee2fa6f88afc758069f7" + + "e0b8f676e644216f4d2a3422d7fa36c6c4931aca950e9da42788e6d0b6d1cd83" + + "8ef652e97b145b14871eae6c6804c7004db5ac2fce4c68c726d004b10fcaba86"), + }, + { + key: mustDecodeHex("0000000000000000000000000000000000000000000000000000000000000000"), + iv: mustDecodeHex("000000000000000000000000"), + output: mustDecodeHex("76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586"), + }, + } + for _, c := range cases { + s := NewChaCha20Stream(c.key, c.iv) + input := make([]byte, len(c.output)) + actualOutout := make([]byte, len(c.output)) + s.XORKeyStream(actualOutout, input) + assert.Bytes(c.output).Equals(actualOutout) + } +} diff --git a/common/crypto/internal/chacha.go b/common/crypto/internal/chacha.go new file mode 100644 index 00000000..7624248f --- /dev/null +++ b/common/crypto/internal/chacha.go @@ -0,0 +1,85 @@ +package internal + +//go:generate go run chacha_core_gen.go + +import ( + "encoding/binary" +) + +const ( + wordSize = 4 // the size of ChaCha20's words + stateSize = 16 // the size of ChaCha20's state, in words + blockSize = stateSize * wordSize // the size of ChaCha20's block, in bytes +) + +type ChaCha20Stream struct { + state [stateSize]uint32 // the state as an array of 16 32-bit words + block [blockSize]byte // the keystream as an array of 64 bytes + offset int // the offset of used bytes in block + rounds int +} + +func NewChaCha20Stream(key []byte, nonce []byte, rounds int) *ChaCha20Stream { + s := new(ChaCha20Stream) + // the magic constants for 256-bit keys + s.state[0] = 0x61707865 + s.state[1] = 0x3320646e + s.state[2] = 0x79622d32 + s.state[3] = 0x6b206574 + + for i := 0; i < 8; i++ { + s.state[i+4] = binary.LittleEndian.Uint32(key[i*4 : i*4+4]) + } + + switch len(nonce) { + case 8: + s.state[14] = binary.LittleEndian.Uint32(nonce[0:]) + s.state[15] = binary.LittleEndian.Uint32(nonce[4:]) + case 12: + s.state[13] = binary.LittleEndian.Uint32(nonce[0:4]) + s.state[14] = binary.LittleEndian.Uint32(nonce[4:8]) + s.state[15] = binary.LittleEndian.Uint32(nonce[8:12]) + default: + panic("bad nonce length") + } + + s.rounds = rounds + s.advance() + return s +} + +func (s *ChaCha20Stream) XORKeyStream(dst, src []byte) { + // Stride over the input in 64-byte blocks, minus the amount of keystream + // previously used. This will produce best results when processing blocks + // of a size evenly divisible by 64. + i := 0 + max := len(src) + for i < max { + gap := blockSize - s.offset + + limit := i + gap + if limit > max { + limit = max + } + + o := s.offset + for j := i; j < limit; j++ { + dst[j] = src[j] ^ s.block[o] + o++ + } + + i += gap + s.offset = o + + if o == blockSize { + s.advance() + } + } +} + +func (s *ChaCha20Stream) advance() { + ChaCha20Block(&s.state, s.block[:], s.rounds) + + s.offset = 0 + s.state[12]++ +} diff --git a/common/crypto/internal/chacha_core.go b/common/crypto/internal/chacha_core.go new file mode 100644 index 00000000..ede94d89 --- /dev/null +++ b/common/crypto/internal/chacha_core.go @@ -0,0 +1,126 @@ +// GENERATED CODE. DO NOT MODIFY! +package internal + +import "encoding/binary" + + +func ChaCha20Block(s *[16]uint32, out []byte, rounds int) { + var x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15 = s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15] + for i := 0; i < rounds; i+=2 { + var x uint32 + +x0+=x4 +x=x12^x0 +x12=(x << 16) | (x >> (32 - 16)) +x8+=x12 +x=x4^x8 +x4=(x << 12) | (x >> (32 - 12)) +x0+=x4 +x=x12^x0 +x12=(x << 8) | (x >> (32 - 8)) +x8+=x12 +x=x4^x8 +x4=(x << 7) | (x >> (32 - 7)) +x1+=x5 +x=x13^x1 +x13=(x << 16) | (x >> (32 - 16)) +x9+=x13 +x=x5^x9 +x5=(x << 12) | (x >> (32 - 12)) +x1+=x5 +x=x13^x1 +x13=(x << 8) | (x >> (32 - 8)) +x9+=x13 +x=x5^x9 +x5=(x << 7) | (x >> (32 - 7)) +x2+=x6 +x=x14^x2 +x14=(x << 16) | (x >> (32 - 16)) +x10+=x14 +x=x6^x10 +x6=(x << 12) | (x >> (32 - 12)) +x2+=x6 +x=x14^x2 +x14=(x << 8) | (x >> (32 - 8)) +x10+=x14 +x=x6^x10 +x6=(x << 7) | (x >> (32 - 7)) +x3+=x7 +x=x15^x3 +x15=(x << 16) | (x >> (32 - 16)) +x11+=x15 +x=x7^x11 +x7=(x << 12) | (x >> (32 - 12)) +x3+=x7 +x=x15^x3 +x15=(x << 8) | (x >> (32 - 8)) +x11+=x15 +x=x7^x11 +x7=(x << 7) | (x >> (32 - 7)) +x0+=x5 +x=x15^x0 +x15=(x << 16) | (x >> (32 - 16)) +x10+=x15 +x=x5^x10 +x5=(x << 12) | (x >> (32 - 12)) +x0+=x5 +x=x15^x0 +x15=(x << 8) | (x >> (32 - 8)) +x10+=x15 +x=x5^x10 +x5=(x << 7) | (x >> (32 - 7)) +x1+=x6 +x=x12^x1 +x12=(x << 16) | (x >> (32 - 16)) +x11+=x12 +x=x6^x11 +x6=(x << 12) | (x >> (32 - 12)) +x1+=x6 +x=x12^x1 +x12=(x << 8) | (x >> (32 - 8)) +x11+=x12 +x=x6^x11 +x6=(x << 7) | (x >> (32 - 7)) +x2+=x7 +x=x13^x2 +x13=(x << 16) | (x >> (32 - 16)) +x8+=x13 +x=x7^x8 +x7=(x << 12) | (x >> (32 - 12)) +x2+=x7 +x=x13^x2 +x13=(x << 8) | (x >> (32 - 8)) +x8+=x13 +x=x7^x8 +x7=(x << 7) | (x >> (32 - 7)) +x3+=x4 +x=x14^x3 +x14=(x << 16) | (x >> (32 - 16)) +x9+=x14 +x=x4^x9 +x4=(x << 12) | (x >> (32 - 12)) +x3+=x4 +x=x14^x3 +x14=(x << 8) | (x >> (32 - 8)) +x9+=x14 +x=x4^x9 +x4=(x << 7) | (x >> (32 - 7)) +} +binary.LittleEndian.PutUint32(out[0:4], s[0]+x0) +binary.LittleEndian.PutUint32(out[4:8], s[1]+x1) +binary.LittleEndian.PutUint32(out[8:12], s[2]+x2) +binary.LittleEndian.PutUint32(out[12:16], s[3]+x3) +binary.LittleEndian.PutUint32(out[16:20], s[4]+x4) +binary.LittleEndian.PutUint32(out[20:24], s[5]+x5) +binary.LittleEndian.PutUint32(out[24:28], s[6]+x6) +binary.LittleEndian.PutUint32(out[28:32], s[7]+x7) +binary.LittleEndian.PutUint32(out[32:36], s[8]+x8) +binary.LittleEndian.PutUint32(out[36:40], s[9]+x9) +binary.LittleEndian.PutUint32(out[40:44], s[10]+x10) +binary.LittleEndian.PutUint32(out[44:48], s[11]+x11) +binary.LittleEndian.PutUint32(out[48:52], s[12]+x12) +binary.LittleEndian.PutUint32(out[52:56], s[13]+x13) +binary.LittleEndian.PutUint32(out[56:60], s[14]+x14) +binary.LittleEndian.PutUint32(out[60:64], s[15]+x15) +} + diff --git a/common/crypto/internal/chacha_core_gen.go b/common/crypto/internal/chacha_core_gen.go new file mode 100644 index 00000000..b49a63ab --- /dev/null +++ b/common/crypto/internal/chacha_core_gen.go @@ -0,0 +1,70 @@ +// +build generate + +package main + +import ( + "fmt" + "log" + "os" +) + +func writeQuarterRound(file *os.File, a, b, c, d int) { + add := "x%d+=x%d\n" + xor := "x=x%d^x%d\n" + rotate := "x%d=(x << %d) | (x >> (32 - %d))\n" + + fmt.Fprintf(file, add, a, b) + fmt.Fprintf(file, xor, d, a) + fmt.Fprintf(file, rotate, d, 16, 16) + + fmt.Fprintf(file, add, c, d) + fmt.Fprintf(file, xor, b, c) + fmt.Fprintf(file, rotate, b, 12, 12) + + fmt.Fprintf(file, add, a, b) + fmt.Fprintf(file, xor, d, a) + fmt.Fprintf(file, rotate, d, 8, 8) + + fmt.Fprintf(file, add, c, d) + fmt.Fprintf(file, xor, b, c) + fmt.Fprintf(file, rotate, b, 7, 7) +} + +func writeChacha20Block(file *os.File) { + fmt.Fprintln(file, ` +func ChaCha20Block(s *[16]uint32, out []byte, rounds int) { + var x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15 = s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15] + for i := 0; i < rounds; i+=2 { + var x uint32 + `) + + writeQuarterRound(file, 0, 4, 8, 12) + writeQuarterRound(file, 1, 5, 9, 13) + writeQuarterRound(file, 2, 6, 10, 14) + writeQuarterRound(file, 3, 7, 11, 15) + writeQuarterRound(file, 0, 5, 10, 15) + writeQuarterRound(file, 1, 6, 11, 12) + writeQuarterRound(file, 2, 7, 8, 13) + writeQuarterRound(file, 3, 4, 9, 14) + fmt.Fprintln(file, "}") + for i := 0; i < 16; i++ { + fmt.Fprintf(file, "binary.LittleEndian.PutUint32(out[%d:%d], s[%d]+x%d)\n", i*4, i*4+4, i, i) + } + fmt.Fprintln(file, "}") + fmt.Fprintln(file) +} + +func main() { + file, err := os.OpenFile("chacha_core.go", os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644) + if err != nil { + log.Fatalf("Failed to generate chacha_core.go: %v", err) + } + defer file.Close() + + fmt.Fprintln(file, "// GENERATED CODE. DO NOT MODIFY!") + fmt.Fprintln(file, "package internal") + fmt.Fprintln(file) + fmt.Fprintln(file, "import \"encoding/binary\"") + fmt.Fprintln(file) + writeChacha20Block(file) +}