mirror of https://github.com/hashicorp/consul
Adds HTTP/2 support to Consul's HTTPS server. (#3657)
* Refactors the HTTP listen path to create servers in the same spot. * Adds HTTP/2 support to Consul's HTTPS server. * Vendors Go HTTP/2 library and associated deps.pull/3661/head
parent
aa199ab6ba
commit
4a2cafe525
|
@ -39,6 +39,7 @@ import (
|
|||
"github.com/hashicorp/raft"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
"github.com/shirou/gopsutil/host"
|
||||
"golang.org/x/net/http2"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -340,17 +341,16 @@ func (a *Agent) Start() error {
|
|||
return err
|
||||
}
|
||||
|
||||
// create listeners and unstarted servers
|
||||
// see comment on listenHTTP why we are doing this
|
||||
httpln, err := a.listenHTTP()
|
||||
// Create listeners and unstarted servers; see comment on listenHTTP why
|
||||
// we are doing this.
|
||||
servers, err := a.listenHTTP()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// start HTTP and HTTPS servers
|
||||
for _, l := range httpln {
|
||||
srv := NewHTTPServer(l.Addr(), a)
|
||||
if err := a.serveHTTP(l, srv); err != nil {
|
||||
// Start HTTP and HTTPS servers.
|
||||
for _, srv := range servers {
|
||||
if err := a.serveHTTP(srv); err != nil {
|
||||
return err
|
||||
}
|
||||
a.httpServers = append(a.httpServers, srv)
|
||||
|
@ -418,12 +418,13 @@ func (a *Agent) listenAndServeDNS() error {
|
|||
//
|
||||
// This approach should ultimately be refactored to the point where we just
|
||||
// start the server and any error should trigger a proper shutdown of the agent.
|
||||
func (a *Agent) listenHTTP() ([]net.Listener, error) {
|
||||
func (a *Agent) listenHTTP() ([]*HTTPServer, error) {
|
||||
var ln []net.Listener
|
||||
|
||||
var servers []*HTTPServer
|
||||
start := func(proto string, addrs []net.Addr) error {
|
||||
for _, addr := range addrs {
|
||||
var l net.Listener
|
||||
var tlscfg *tls.Config
|
||||
var err error
|
||||
|
||||
switch x := addr.(type) {
|
||||
|
@ -438,11 +439,10 @@ func (a *Agent) listenHTTP() ([]net.Listener, error) {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
l = &tcpKeepAliveListener{l.(*net.TCPListener)}
|
||||
|
||||
if proto == "https" {
|
||||
tlscfg, err := a.config.IncomingHTTPSConfig()
|
||||
tlscfg, err = a.config.IncomingHTTPSConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -453,6 +453,29 @@ func (a *Agent) listenHTTP() ([]net.Listener, error) {
|
|||
return fmt.Errorf("unsupported address type %T", addr)
|
||||
}
|
||||
ln = append(ln, l)
|
||||
|
||||
srv := &HTTPServer{
|
||||
Server: &http.Server{
|
||||
Addr: l.Addr().String(),
|
||||
TLSConfig: tlscfg,
|
||||
},
|
||||
ln: l,
|
||||
agent: a,
|
||||
blacklist: NewBlacklist(a.config.HTTPBlockEndpoints),
|
||||
proto: proto,
|
||||
}
|
||||
srv.Server.Handler = srv.handler(a.config.EnableDebug)
|
||||
|
||||
// This will enable upgrading connections to HTTP/2 as
|
||||
// part of TLS negotiation.
|
||||
if proto == "https" {
|
||||
err = http2.ConfigureServer(srv.Server, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
servers = append(servers, srv)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -469,12 +492,11 @@ func (a *Agent) listenHTTP() ([]net.Listener, error) {
|
|||
}
|
||||
return nil, err
|
||||
}
|
||||
return ln, nil
|
||||
return servers, nil
|
||||
}
|
||||
|
||||
// tcpKeepAliveListener sets TCP keep-alive timeouts on accepted
|
||||
// connections. It's used by NewHttpServer so dead TCP connections
|
||||
// eventually go away.
|
||||
// connections. It's used so dead TCP connections eventually go away.
|
||||
type tcpKeepAliveListener struct {
|
||||
*net.TCPListener
|
||||
}
|
||||
|
@ -507,28 +529,19 @@ func (a *Agent) listenSocket(path string) (net.Listener, error) {
|
|||
return l, nil
|
||||
}
|
||||
|
||||
func (a *Agent) serveHTTP(l net.Listener, srv *HTTPServer) error {
|
||||
func (a *Agent) serveHTTP(srv *HTTPServer) error {
|
||||
// https://github.com/golang/go/issues/20239
|
||||
//
|
||||
// In go.8.1 there is a race between Serve and Shutdown. If
|
||||
// Shutdown is called before the Serve go routine was scheduled then
|
||||
// the Serve go routine never returns. This deadlocks the agent
|
||||
// shutdown for some tests since it will wait forever.
|
||||
//
|
||||
// Since we need to check for an unexported type (*tls.listener)
|
||||
// we cannot just perform a type check since the compiler won't let
|
||||
// us. We might be able to use reflection but the fmt.Sprintf() hack
|
||||
// works just as well.
|
||||
srv.proto = "http"
|
||||
if strings.Contains("*tls.listener", fmt.Sprintf("%T", l)) {
|
||||
srv.proto = "https"
|
||||
}
|
||||
notif := make(chan net.Addr)
|
||||
a.wgServers.Add(1)
|
||||
go func() {
|
||||
defer a.wgServers.Done()
|
||||
notif <- l.Addr()
|
||||
err := srv.Serve(l)
|
||||
notif <- srv.ln.Addr()
|
||||
err := srv.Serve(srv.ln)
|
||||
if err != nil && err != http.ErrServerClosed {
|
||||
a.logger.Print(err)
|
||||
}
|
||||
|
@ -1210,12 +1223,12 @@ func (a *Agent) ShutdownEndpoints() {
|
|||
a.dnsServers = nil
|
||||
|
||||
for _, srv := range a.httpServers {
|
||||
a.logger.Printf("[INFO] agent: Stopping %s server %s (%s)", strings.ToUpper(srv.proto), srv.addr.String(), srv.addr.Network())
|
||||
a.logger.Printf("[INFO] agent: Stopping %s server %s (%s)", strings.ToUpper(srv.proto), srv.ln.Addr().String(), srv.ln.Addr().Network())
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel()
|
||||
srv.Shutdown(ctx)
|
||||
if ctx.Err() == context.DeadlineExceeded {
|
||||
a.logger.Printf("[WARN] agent: Timeout stopping %s server %s (%s)", strings.ToUpper(srv.proto), srv.addr.String(), srv.addr.Network())
|
||||
a.logger.Printf("[WARN] agent: Timeout stopping %s server %s (%s)", strings.ToUpper(srv.proto), srv.ln.Addr().String(), srv.ln.Addr().Network())
|
||||
}
|
||||
}
|
||||
a.httpServers = nil
|
||||
|
|
|
@ -31,24 +31,12 @@ func (e MethodNotAllowedError) Error() string {
|
|||
// HTTPServer provides an HTTP api for an agent.
|
||||
type HTTPServer struct {
|
||||
*http.Server
|
||||
ln net.Listener
|
||||
agent *Agent
|
||||
blacklist *Blacklist
|
||||
|
||||
// proto is filled by the agent to "http" or "https".
|
||||
proto string
|
||||
addr net.Addr
|
||||
}
|
||||
|
||||
func NewHTTPServer(addr net.Addr, a *Agent) *HTTPServer {
|
||||
s := &HTTPServer{
|
||||
Server: &http.Server{Addr: addr.String()},
|
||||
agent: a,
|
||||
blacklist: NewBlacklist(a.config.HTTPBlockEndpoints),
|
||||
addr: addr,
|
||||
}
|
||||
|
||||
s.Server.Handler = s.handler(a.config.EnableDebug)
|
||||
return s
|
||||
}
|
||||
|
||||
// handler is used to attach our handlers to the mux
|
||||
|
|
|
@ -19,9 +19,11 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/api"
|
||||
"github.com/hashicorp/consul/logger"
|
||||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/go-cleanhttp"
|
||||
"golang.org/x/net/http2"
|
||||
)
|
||||
|
||||
func TestHTTPServer_UnixSocket(t *testing.T) {
|
||||
|
@ -122,6 +124,86 @@ func TestHTTPServer_UnixSocket_FileExists(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestHTTPServer_H2(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Fire up an agent with TLS enabled.
|
||||
a := &TestAgent{
|
||||
Name: t.Name(),
|
||||
UseTLS: true,
|
||||
HCL: `
|
||||
key_file = "../test/client_certs/server.key"
|
||||
cert_file = "../test/client_certs/server.crt"
|
||||
ca_file = "../test/client_certs/rootca.crt"
|
||||
`,
|
||||
}
|
||||
a.Start()
|
||||
defer a.Shutdown()
|
||||
|
||||
// Make an HTTP/2-enabled client, using the API helpers to set
|
||||
// up TLS to be as normal as possible for Consul.
|
||||
tlscfg := &api.TLSConfig{
|
||||
Address: "consul.test",
|
||||
KeyFile: "../test/client_certs/client.key",
|
||||
CertFile: "../test/client_certs/client.crt",
|
||||
CAFile: "../test/client_certs/rootca.crt",
|
||||
}
|
||||
tlsccfg, err := api.SetupTLSConfig(tlscfg)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
transport := api.DefaultConfig().Transport
|
||||
transport.TLSClientConfig = tlsccfg
|
||||
if err := http2.ConfigureTransport(transport); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
hc := &http.Client{
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
// Hook a handler that echoes back the protocol.
|
||||
handler := func(resp http.ResponseWriter, req *http.Request) {
|
||||
resp.WriteHeader(http.StatusOK)
|
||||
fmt.Fprint(resp, req.Proto)
|
||||
}
|
||||
mux, ok := a.srv.Handler.(*http.ServeMux)
|
||||
if !ok {
|
||||
t.Fatalf("handler is not expected type")
|
||||
}
|
||||
mux.HandleFunc("/echo", handler)
|
||||
|
||||
// Call it and make sure we see HTTP/2.
|
||||
url := fmt.Sprintf("https://%s/echo", a.srv.ln.Addr().String())
|
||||
resp, err := hc.Get(url)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if !bytes.Equal(body, []byte("HTTP/2.0")) {
|
||||
t.Fatalf("bad: %v", body)
|
||||
}
|
||||
|
||||
// This doesn't have a closed-loop way to verify HTTP/2 support for
|
||||
// some other endpoint, but configure an API client and make a call
|
||||
// just as a sanity check.
|
||||
cfg := &api.Config{
|
||||
Address: a.srv.ln.Addr().String(),
|
||||
Scheme: "https",
|
||||
HttpClient: hc,
|
||||
}
|
||||
client, err := api.NewClient(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if _, err := client.Agent().Self(); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetIndex(t *testing.T) {
|
||||
t.Parallel()
|
||||
resp := httptest.NewRecorder()
|
||||
|
|
|
@ -65,6 +65,10 @@ type TestAgent struct {
|
|||
// Key is the optional encryption key for the LAN and WAN keyring.
|
||||
Key string
|
||||
|
||||
// UseTLS, if true, will disable the HTTP port and enable the HTTPS
|
||||
// one.
|
||||
UseTLS bool
|
||||
|
||||
// dns is a reference to the first started DNS endpoint.
|
||||
// It is valid after Start().
|
||||
dns *DNSServer
|
||||
|
@ -117,7 +121,7 @@ func (a *TestAgent) Start() *TestAgent {
|
|||
a.Config = TestConfig(
|
||||
config.Source{Name: a.Name, Format: "hcl", Data: a.HCL},
|
||||
config.Source{Name: a.Name + ".data_dir", Format: "hcl", Data: hclDataDir},
|
||||
randomPortsSource(),
|
||||
randomPortsSource(a.UseTLS),
|
||||
)
|
||||
|
||||
// write the keyring
|
||||
|
@ -284,8 +288,13 @@ func (a *TestAgent) consulConfig() *consul.Config {
|
|||
// chance of port conflicts for concurrently executed test binaries.
|
||||
// Instead of relying on one set of ports to be sufficient we retry
|
||||
// starting the agent with different ports on port conflict.
|
||||
func randomPortsSource() config.Source {
|
||||
ports := freeport.Get(5)
|
||||
func randomPortsSource(tls bool) config.Source {
|
||||
ports := freeport.Get(6)
|
||||
if tls {
|
||||
ports[1] = -1
|
||||
} else {
|
||||
ports[2] = -1
|
||||
}
|
||||
return config.Source{
|
||||
Name: "ports",
|
||||
Format: "hcl",
|
||||
|
@ -293,10 +302,10 @@ func randomPortsSource() config.Source {
|
|||
ports = {
|
||||
dns = ` + strconv.Itoa(ports[0]) + `
|
||||
http = ` + strconv.Itoa(ports[1]) + `
|
||||
https = -1
|
||||
serf_lan = ` + strconv.Itoa(ports[2]) + `
|
||||
serf_wan = ` + strconv.Itoa(ports[3]) + `
|
||||
server = ` + strconv.Itoa(ports[4]) + `
|
||||
https = ` + strconv.Itoa(ports[2]) + `
|
||||
serf_lan = ` + strconv.Itoa(ports[3]) + `
|
||||
serf_wan = ` + strconv.Itoa(ports[4]) + `
|
||||
server = ` + strconv.Itoa(ports[5]) + `
|
||||
}
|
||||
`,
|
||||
}
|
||||
|
|
|
@ -477,6 +477,14 @@ func NewHttpClient(transport *http.Transport, tlsConf TLSConfig) (*http.Client,
|
|||
Transport: transport,
|
||||
}
|
||||
|
||||
// TODO (slackpad) - Once we get some run time on the HTTP/2 support we
|
||||
// should turn it on by default if TLS is enabled. We would basically
|
||||
// just need to call http2.ConfigureTransport(transport) here. We also
|
||||
// don't want to introduce another external dependency on
|
||||
// golang.org/x/net/http2 at this time. For a complete recipe for how
|
||||
// to enable HTTP/2 support on a transport suitable for the API client
|
||||
// library see agent/http_test.go:TestHTTPServer_H2.
|
||||
|
||||
if transport.TLSClientConfig == nil {
|
||||
tlsClientConfig, err := SetupTLSConfig(&tlsConf)
|
||||
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
#
|
||||
# This Dockerfile builds a recent curl with HTTP/2 client support, using
|
||||
# a recent nghttp2 build.
|
||||
#
|
||||
# See the Makefile for how to tag it. If Docker and that image is found, the
|
||||
# Go tests use this curl binary for integration tests.
|
||||
#
|
||||
|
||||
FROM ubuntu:trusty
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get upgrade -y && \
|
||||
apt-get install -y git-core build-essential wget
|
||||
|
||||
RUN apt-get install -y --no-install-recommends \
|
||||
autotools-dev libtool pkg-config zlib1g-dev \
|
||||
libcunit1-dev libssl-dev libxml2-dev libevent-dev \
|
||||
automake autoconf
|
||||
|
||||
# The list of packages nghttp2 recommends for h2load:
|
||||
RUN apt-get install -y --no-install-recommends make binutils \
|
||||
autoconf automake autotools-dev \
|
||||
libtool pkg-config zlib1g-dev libcunit1-dev libssl-dev libxml2-dev \
|
||||
libev-dev libevent-dev libjansson-dev libjemalloc-dev \
|
||||
cython python3.4-dev python-setuptools
|
||||
|
||||
# Note: setting NGHTTP2_VER before the git clone, so an old git clone isn't cached:
|
||||
ENV NGHTTP2_VER 895da9a
|
||||
RUN cd /root && git clone https://github.com/tatsuhiro-t/nghttp2.git
|
||||
|
||||
WORKDIR /root/nghttp2
|
||||
RUN git reset --hard $NGHTTP2_VER
|
||||
RUN autoreconf -i
|
||||
RUN automake
|
||||
RUN autoconf
|
||||
RUN ./configure
|
||||
RUN make
|
||||
RUN make install
|
||||
|
||||
WORKDIR /root
|
||||
RUN wget http://curl.haxx.se/download/curl-7.45.0.tar.gz
|
||||
RUN tar -zxvf curl-7.45.0.tar.gz
|
||||
WORKDIR /root/curl-7.45.0
|
||||
RUN ./configure --with-ssl --with-nghttp2=/usr/local
|
||||
RUN make
|
||||
RUN make install
|
||||
RUN ldconfig
|
||||
|
||||
CMD ["-h"]
|
||||
ENTRYPOINT ["/usr/local/bin/curl"]
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
curlimage:
|
||||
docker build -t gohttp2/curl .
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
This is a work-in-progress HTTP/2 implementation for Go.
|
||||
|
||||
It will eventually live in the Go standard library and won't require
|
||||
any changes to your code to use. It will just be automatic.
|
||||
|
||||
Status:
|
||||
|
||||
* The server support is pretty good. A few things are missing
|
||||
but are being worked on.
|
||||
* The client work has just started but shares a lot of code
|
||||
is coming along much quicker.
|
||||
|
||||
Docs are at https://godoc.org/golang.org/x/net/http2
|
||||
|
||||
Demo test server at https://http2.golang.org/
|
||||
|
||||
Help & bug reports welcome!
|
||||
|
||||
Contributing: https://golang.org/doc/contribute.html
|
||||
Bugs: https://golang.org/issue/new?title=x/net/http2:+
|
|
@ -0,0 +1,641 @@
|
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
// A list of the possible cipher suite ids. Taken from
|
||||
// http://www.iana.org/assignments/tls-parameters/tls-parameters.txt
|
||||
|
||||
const (
|
||||
cipher_TLS_NULL_WITH_NULL_NULL uint16 = 0x0000
|
||||
cipher_TLS_RSA_WITH_NULL_MD5 uint16 = 0x0001
|
||||
cipher_TLS_RSA_WITH_NULL_SHA uint16 = 0x0002
|
||||
cipher_TLS_RSA_EXPORT_WITH_RC4_40_MD5 uint16 = 0x0003
|
||||
cipher_TLS_RSA_WITH_RC4_128_MD5 uint16 = 0x0004
|
||||
cipher_TLS_RSA_WITH_RC4_128_SHA uint16 = 0x0005
|
||||
cipher_TLS_RSA_EXPORT_WITH_RC2_CBC_40_MD5 uint16 = 0x0006
|
||||
cipher_TLS_RSA_WITH_IDEA_CBC_SHA uint16 = 0x0007
|
||||
cipher_TLS_RSA_EXPORT_WITH_DES40_CBC_SHA uint16 = 0x0008
|
||||
cipher_TLS_RSA_WITH_DES_CBC_SHA uint16 = 0x0009
|
||||
cipher_TLS_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0x000A
|
||||
cipher_TLS_DH_DSS_EXPORT_WITH_DES40_CBC_SHA uint16 = 0x000B
|
||||
cipher_TLS_DH_DSS_WITH_DES_CBC_SHA uint16 = 0x000C
|
||||
cipher_TLS_DH_DSS_WITH_3DES_EDE_CBC_SHA uint16 = 0x000D
|
||||
cipher_TLS_DH_RSA_EXPORT_WITH_DES40_CBC_SHA uint16 = 0x000E
|
||||
cipher_TLS_DH_RSA_WITH_DES_CBC_SHA uint16 = 0x000F
|
||||
cipher_TLS_DH_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0x0010
|
||||
cipher_TLS_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA uint16 = 0x0011
|
||||
cipher_TLS_DHE_DSS_WITH_DES_CBC_SHA uint16 = 0x0012
|
||||
cipher_TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA uint16 = 0x0013
|
||||
cipher_TLS_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA uint16 = 0x0014
|
||||
cipher_TLS_DHE_RSA_WITH_DES_CBC_SHA uint16 = 0x0015
|
||||
cipher_TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0x0016
|
||||
cipher_TLS_DH_anon_EXPORT_WITH_RC4_40_MD5 uint16 = 0x0017
|
||||
cipher_TLS_DH_anon_WITH_RC4_128_MD5 uint16 = 0x0018
|
||||
cipher_TLS_DH_anon_EXPORT_WITH_DES40_CBC_SHA uint16 = 0x0019
|
||||
cipher_TLS_DH_anon_WITH_DES_CBC_SHA uint16 = 0x001A
|
||||
cipher_TLS_DH_anon_WITH_3DES_EDE_CBC_SHA uint16 = 0x001B
|
||||
// Reserved uint16 = 0x001C-1D
|
||||
cipher_TLS_KRB5_WITH_DES_CBC_SHA uint16 = 0x001E
|
||||
cipher_TLS_KRB5_WITH_3DES_EDE_CBC_SHA uint16 = 0x001F
|
||||
cipher_TLS_KRB5_WITH_RC4_128_SHA uint16 = 0x0020
|
||||
cipher_TLS_KRB5_WITH_IDEA_CBC_SHA uint16 = 0x0021
|
||||
cipher_TLS_KRB5_WITH_DES_CBC_MD5 uint16 = 0x0022
|
||||
cipher_TLS_KRB5_WITH_3DES_EDE_CBC_MD5 uint16 = 0x0023
|
||||
cipher_TLS_KRB5_WITH_RC4_128_MD5 uint16 = 0x0024
|
||||
cipher_TLS_KRB5_WITH_IDEA_CBC_MD5 uint16 = 0x0025
|
||||
cipher_TLS_KRB5_EXPORT_WITH_DES_CBC_40_SHA uint16 = 0x0026
|
||||
cipher_TLS_KRB5_EXPORT_WITH_RC2_CBC_40_SHA uint16 = 0x0027
|
||||
cipher_TLS_KRB5_EXPORT_WITH_RC4_40_SHA uint16 = 0x0028
|
||||
cipher_TLS_KRB5_EXPORT_WITH_DES_CBC_40_MD5 uint16 = 0x0029
|
||||
cipher_TLS_KRB5_EXPORT_WITH_RC2_CBC_40_MD5 uint16 = 0x002A
|
||||
cipher_TLS_KRB5_EXPORT_WITH_RC4_40_MD5 uint16 = 0x002B
|
||||
cipher_TLS_PSK_WITH_NULL_SHA uint16 = 0x002C
|
||||
cipher_TLS_DHE_PSK_WITH_NULL_SHA uint16 = 0x002D
|
||||
cipher_TLS_RSA_PSK_WITH_NULL_SHA uint16 = 0x002E
|
||||
cipher_TLS_RSA_WITH_AES_128_CBC_SHA uint16 = 0x002F
|
||||
cipher_TLS_DH_DSS_WITH_AES_128_CBC_SHA uint16 = 0x0030
|
||||
cipher_TLS_DH_RSA_WITH_AES_128_CBC_SHA uint16 = 0x0031
|
||||
cipher_TLS_DHE_DSS_WITH_AES_128_CBC_SHA uint16 = 0x0032
|
||||
cipher_TLS_DHE_RSA_WITH_AES_128_CBC_SHA uint16 = 0x0033
|
||||
cipher_TLS_DH_anon_WITH_AES_128_CBC_SHA uint16 = 0x0034
|
||||
cipher_TLS_RSA_WITH_AES_256_CBC_SHA uint16 = 0x0035
|
||||
cipher_TLS_DH_DSS_WITH_AES_256_CBC_SHA uint16 = 0x0036
|
||||
cipher_TLS_DH_RSA_WITH_AES_256_CBC_SHA uint16 = 0x0037
|
||||
cipher_TLS_DHE_DSS_WITH_AES_256_CBC_SHA uint16 = 0x0038
|
||||
cipher_TLS_DHE_RSA_WITH_AES_256_CBC_SHA uint16 = 0x0039
|
||||
cipher_TLS_DH_anon_WITH_AES_256_CBC_SHA uint16 = 0x003A
|
||||
cipher_TLS_RSA_WITH_NULL_SHA256 uint16 = 0x003B
|
||||
cipher_TLS_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0x003C
|
||||
cipher_TLS_RSA_WITH_AES_256_CBC_SHA256 uint16 = 0x003D
|
||||
cipher_TLS_DH_DSS_WITH_AES_128_CBC_SHA256 uint16 = 0x003E
|
||||
cipher_TLS_DH_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0x003F
|
||||
cipher_TLS_DHE_DSS_WITH_AES_128_CBC_SHA256 uint16 = 0x0040
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_128_CBC_SHA uint16 = 0x0041
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA uint16 = 0x0042
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA uint16 = 0x0043
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA uint16 = 0x0044
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA uint16 = 0x0045
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_128_CBC_SHA uint16 = 0x0046
|
||||
// Reserved uint16 = 0x0047-4F
|
||||
// Reserved uint16 = 0x0050-58
|
||||
// Reserved uint16 = 0x0059-5C
|
||||
// Unassigned uint16 = 0x005D-5F
|
||||
// Reserved uint16 = 0x0060-66
|
||||
cipher_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0x0067
|
||||
cipher_TLS_DH_DSS_WITH_AES_256_CBC_SHA256 uint16 = 0x0068
|
||||
cipher_TLS_DH_RSA_WITH_AES_256_CBC_SHA256 uint16 = 0x0069
|
||||
cipher_TLS_DHE_DSS_WITH_AES_256_CBC_SHA256 uint16 = 0x006A
|
||||
cipher_TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 uint16 = 0x006B
|
||||
cipher_TLS_DH_anon_WITH_AES_128_CBC_SHA256 uint16 = 0x006C
|
||||
cipher_TLS_DH_anon_WITH_AES_256_CBC_SHA256 uint16 = 0x006D
|
||||
// Unassigned uint16 = 0x006E-83
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_256_CBC_SHA uint16 = 0x0084
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA uint16 = 0x0085
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA uint16 = 0x0086
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA uint16 = 0x0087
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA uint16 = 0x0088
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_256_CBC_SHA uint16 = 0x0089
|
||||
cipher_TLS_PSK_WITH_RC4_128_SHA uint16 = 0x008A
|
||||
cipher_TLS_PSK_WITH_3DES_EDE_CBC_SHA uint16 = 0x008B
|
||||
cipher_TLS_PSK_WITH_AES_128_CBC_SHA uint16 = 0x008C
|
||||
cipher_TLS_PSK_WITH_AES_256_CBC_SHA uint16 = 0x008D
|
||||
cipher_TLS_DHE_PSK_WITH_RC4_128_SHA uint16 = 0x008E
|
||||
cipher_TLS_DHE_PSK_WITH_3DES_EDE_CBC_SHA uint16 = 0x008F
|
||||
cipher_TLS_DHE_PSK_WITH_AES_128_CBC_SHA uint16 = 0x0090
|
||||
cipher_TLS_DHE_PSK_WITH_AES_256_CBC_SHA uint16 = 0x0091
|
||||
cipher_TLS_RSA_PSK_WITH_RC4_128_SHA uint16 = 0x0092
|
||||
cipher_TLS_RSA_PSK_WITH_3DES_EDE_CBC_SHA uint16 = 0x0093
|
||||
cipher_TLS_RSA_PSK_WITH_AES_128_CBC_SHA uint16 = 0x0094
|
||||
cipher_TLS_RSA_PSK_WITH_AES_256_CBC_SHA uint16 = 0x0095
|
||||
cipher_TLS_RSA_WITH_SEED_CBC_SHA uint16 = 0x0096
|
||||
cipher_TLS_DH_DSS_WITH_SEED_CBC_SHA uint16 = 0x0097
|
||||
cipher_TLS_DH_RSA_WITH_SEED_CBC_SHA uint16 = 0x0098
|
||||
cipher_TLS_DHE_DSS_WITH_SEED_CBC_SHA uint16 = 0x0099
|
||||
cipher_TLS_DHE_RSA_WITH_SEED_CBC_SHA uint16 = 0x009A
|
||||
cipher_TLS_DH_anon_WITH_SEED_CBC_SHA uint16 = 0x009B
|
||||
cipher_TLS_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0x009C
|
||||
cipher_TLS_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0x009D
|
||||
cipher_TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0x009E
|
||||
cipher_TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0x009F
|
||||
cipher_TLS_DH_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0x00A0
|
||||
cipher_TLS_DH_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0x00A1
|
||||
cipher_TLS_DHE_DSS_WITH_AES_128_GCM_SHA256 uint16 = 0x00A2
|
||||
cipher_TLS_DHE_DSS_WITH_AES_256_GCM_SHA384 uint16 = 0x00A3
|
||||
cipher_TLS_DH_DSS_WITH_AES_128_GCM_SHA256 uint16 = 0x00A4
|
||||
cipher_TLS_DH_DSS_WITH_AES_256_GCM_SHA384 uint16 = 0x00A5
|
||||
cipher_TLS_DH_anon_WITH_AES_128_GCM_SHA256 uint16 = 0x00A6
|
||||
cipher_TLS_DH_anon_WITH_AES_256_GCM_SHA384 uint16 = 0x00A7
|
||||
cipher_TLS_PSK_WITH_AES_128_GCM_SHA256 uint16 = 0x00A8
|
||||
cipher_TLS_PSK_WITH_AES_256_GCM_SHA384 uint16 = 0x00A9
|
||||
cipher_TLS_DHE_PSK_WITH_AES_128_GCM_SHA256 uint16 = 0x00AA
|
||||
cipher_TLS_DHE_PSK_WITH_AES_256_GCM_SHA384 uint16 = 0x00AB
|
||||
cipher_TLS_RSA_PSK_WITH_AES_128_GCM_SHA256 uint16 = 0x00AC
|
||||
cipher_TLS_RSA_PSK_WITH_AES_256_GCM_SHA384 uint16 = 0x00AD
|
||||
cipher_TLS_PSK_WITH_AES_128_CBC_SHA256 uint16 = 0x00AE
|
||||
cipher_TLS_PSK_WITH_AES_256_CBC_SHA384 uint16 = 0x00AF
|
||||
cipher_TLS_PSK_WITH_NULL_SHA256 uint16 = 0x00B0
|
||||
cipher_TLS_PSK_WITH_NULL_SHA384 uint16 = 0x00B1
|
||||
cipher_TLS_DHE_PSK_WITH_AES_128_CBC_SHA256 uint16 = 0x00B2
|
||||
cipher_TLS_DHE_PSK_WITH_AES_256_CBC_SHA384 uint16 = 0x00B3
|
||||
cipher_TLS_DHE_PSK_WITH_NULL_SHA256 uint16 = 0x00B4
|
||||
cipher_TLS_DHE_PSK_WITH_NULL_SHA384 uint16 = 0x00B5
|
||||
cipher_TLS_RSA_PSK_WITH_AES_128_CBC_SHA256 uint16 = 0x00B6
|
||||
cipher_TLS_RSA_PSK_WITH_AES_256_CBC_SHA384 uint16 = 0x00B7
|
||||
cipher_TLS_RSA_PSK_WITH_NULL_SHA256 uint16 = 0x00B8
|
||||
cipher_TLS_RSA_PSK_WITH_NULL_SHA384 uint16 = 0x00B9
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0x00BA
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0x00BB
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0x00BC
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0x00BD
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0x00BE
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0x00BF
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256 uint16 = 0x00C0
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA256 uint16 = 0x00C1
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA256 uint16 = 0x00C2
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA256 uint16 = 0x00C3
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256 uint16 = 0x00C4
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_256_CBC_SHA256 uint16 = 0x00C5
|
||||
// Unassigned uint16 = 0x00C6-FE
|
||||
cipher_TLS_EMPTY_RENEGOTIATION_INFO_SCSV uint16 = 0x00FF
|
||||
// Unassigned uint16 = 0x01-55,*
|
||||
cipher_TLS_FALLBACK_SCSV uint16 = 0x5600
|
||||
// Unassigned uint16 = 0x5601 - 0xC000
|
||||
cipher_TLS_ECDH_ECDSA_WITH_NULL_SHA uint16 = 0xC001
|
||||
cipher_TLS_ECDH_ECDSA_WITH_RC4_128_SHA uint16 = 0xC002
|
||||
cipher_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA uint16 = 0xC003
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA uint16 = 0xC004
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA uint16 = 0xC005
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_NULL_SHA uint16 = 0xC006
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_RC4_128_SHA uint16 = 0xC007
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA uint16 = 0xC008
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA uint16 = 0xC009
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA uint16 = 0xC00A
|
||||
cipher_TLS_ECDH_RSA_WITH_NULL_SHA uint16 = 0xC00B
|
||||
cipher_TLS_ECDH_RSA_WITH_RC4_128_SHA uint16 = 0xC00C
|
||||
cipher_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0xC00D
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA uint16 = 0xC00E
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA uint16 = 0xC00F
|
||||
cipher_TLS_ECDHE_RSA_WITH_NULL_SHA uint16 = 0xC010
|
||||
cipher_TLS_ECDHE_RSA_WITH_RC4_128_SHA uint16 = 0xC011
|
||||
cipher_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0xC012
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA uint16 = 0xC013
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA uint16 = 0xC014
|
||||
cipher_TLS_ECDH_anon_WITH_NULL_SHA uint16 = 0xC015
|
||||
cipher_TLS_ECDH_anon_WITH_RC4_128_SHA uint16 = 0xC016
|
||||
cipher_TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA uint16 = 0xC017
|
||||
cipher_TLS_ECDH_anon_WITH_AES_128_CBC_SHA uint16 = 0xC018
|
||||
cipher_TLS_ECDH_anon_WITH_AES_256_CBC_SHA uint16 = 0xC019
|
||||
cipher_TLS_SRP_SHA_WITH_3DES_EDE_CBC_SHA uint16 = 0xC01A
|
||||
cipher_TLS_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA uint16 = 0xC01B
|
||||
cipher_TLS_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA uint16 = 0xC01C
|
||||
cipher_TLS_SRP_SHA_WITH_AES_128_CBC_SHA uint16 = 0xC01D
|
||||
cipher_TLS_SRP_SHA_RSA_WITH_AES_128_CBC_SHA uint16 = 0xC01E
|
||||
cipher_TLS_SRP_SHA_DSS_WITH_AES_128_CBC_SHA uint16 = 0xC01F
|
||||
cipher_TLS_SRP_SHA_WITH_AES_256_CBC_SHA uint16 = 0xC020
|
||||
cipher_TLS_SRP_SHA_RSA_WITH_AES_256_CBC_SHA uint16 = 0xC021
|
||||
cipher_TLS_SRP_SHA_DSS_WITH_AES_256_CBC_SHA uint16 = 0xC022
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 uint16 = 0xC023
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 uint16 = 0xC024
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256 uint16 = 0xC025
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384 uint16 = 0xC026
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0xC027
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 uint16 = 0xC028
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256 uint16 = 0xC029
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384 uint16 = 0xC02A
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 uint16 = 0xC02B
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 uint16 = 0xC02C
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256 uint16 = 0xC02D
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384 uint16 = 0xC02E
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0xC02F
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0xC030
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 uint16 = 0xC031
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384 uint16 = 0xC032
|
||||
cipher_TLS_ECDHE_PSK_WITH_RC4_128_SHA uint16 = 0xC033
|
||||
cipher_TLS_ECDHE_PSK_WITH_3DES_EDE_CBC_SHA uint16 = 0xC034
|
||||
cipher_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA uint16 = 0xC035
|
||||
cipher_TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA uint16 = 0xC036
|
||||
cipher_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256 uint16 = 0xC037
|
||||
cipher_TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA384 uint16 = 0xC038
|
||||
cipher_TLS_ECDHE_PSK_WITH_NULL_SHA uint16 = 0xC039
|
||||
cipher_TLS_ECDHE_PSK_WITH_NULL_SHA256 uint16 = 0xC03A
|
||||
cipher_TLS_ECDHE_PSK_WITH_NULL_SHA384 uint16 = 0xC03B
|
||||
cipher_TLS_RSA_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC03C
|
||||
cipher_TLS_RSA_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC03D
|
||||
cipher_TLS_DH_DSS_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC03E
|
||||
cipher_TLS_DH_DSS_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC03F
|
||||
cipher_TLS_DH_RSA_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC040
|
||||
cipher_TLS_DH_RSA_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC041
|
||||
cipher_TLS_DHE_DSS_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC042
|
||||
cipher_TLS_DHE_DSS_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC043
|
||||
cipher_TLS_DHE_RSA_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC044
|
||||
cipher_TLS_DHE_RSA_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC045
|
||||
cipher_TLS_DH_anon_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC046
|
||||
cipher_TLS_DH_anon_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC047
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC048
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC049
|
||||
cipher_TLS_ECDH_ECDSA_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC04A
|
||||
cipher_TLS_ECDH_ECDSA_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC04B
|
||||
cipher_TLS_ECDHE_RSA_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC04C
|
||||
cipher_TLS_ECDHE_RSA_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC04D
|
||||
cipher_TLS_ECDH_RSA_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC04E
|
||||
cipher_TLS_ECDH_RSA_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC04F
|
||||
cipher_TLS_RSA_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC050
|
||||
cipher_TLS_RSA_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC051
|
||||
cipher_TLS_DHE_RSA_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC052
|
||||
cipher_TLS_DHE_RSA_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC053
|
||||
cipher_TLS_DH_RSA_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC054
|
||||
cipher_TLS_DH_RSA_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC055
|
||||
cipher_TLS_DHE_DSS_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC056
|
||||
cipher_TLS_DHE_DSS_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC057
|
||||
cipher_TLS_DH_DSS_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC058
|
||||
cipher_TLS_DH_DSS_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC059
|
||||
cipher_TLS_DH_anon_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC05A
|
||||
cipher_TLS_DH_anon_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC05B
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC05C
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC05D
|
||||
cipher_TLS_ECDH_ECDSA_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC05E
|
||||
cipher_TLS_ECDH_ECDSA_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC05F
|
||||
cipher_TLS_ECDHE_RSA_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC060
|
||||
cipher_TLS_ECDHE_RSA_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC061
|
||||
cipher_TLS_ECDH_RSA_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC062
|
||||
cipher_TLS_ECDH_RSA_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC063
|
||||
cipher_TLS_PSK_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC064
|
||||
cipher_TLS_PSK_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC065
|
||||
cipher_TLS_DHE_PSK_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC066
|
||||
cipher_TLS_DHE_PSK_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC067
|
||||
cipher_TLS_RSA_PSK_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC068
|
||||
cipher_TLS_RSA_PSK_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC069
|
||||
cipher_TLS_PSK_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC06A
|
||||
cipher_TLS_PSK_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC06B
|
||||
cipher_TLS_DHE_PSK_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC06C
|
||||
cipher_TLS_DHE_PSK_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC06D
|
||||
cipher_TLS_RSA_PSK_WITH_ARIA_128_GCM_SHA256 uint16 = 0xC06E
|
||||
cipher_TLS_RSA_PSK_WITH_ARIA_256_GCM_SHA384 uint16 = 0xC06F
|
||||
cipher_TLS_ECDHE_PSK_WITH_ARIA_128_CBC_SHA256 uint16 = 0xC070
|
||||
cipher_TLS_ECDHE_PSK_WITH_ARIA_256_CBC_SHA384 uint16 = 0xC071
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0xC072
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_CAMELLIA_256_CBC_SHA384 uint16 = 0xC073
|
||||
cipher_TLS_ECDH_ECDSA_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0xC074
|
||||
cipher_TLS_ECDH_ECDSA_WITH_CAMELLIA_256_CBC_SHA384 uint16 = 0xC075
|
||||
cipher_TLS_ECDHE_RSA_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0xC076
|
||||
cipher_TLS_ECDHE_RSA_WITH_CAMELLIA_256_CBC_SHA384 uint16 = 0xC077
|
||||
cipher_TLS_ECDH_RSA_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0xC078
|
||||
cipher_TLS_ECDH_RSA_WITH_CAMELLIA_256_CBC_SHA384 uint16 = 0xC079
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC07A
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC07B
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC07C
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC07D
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC07E
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC07F
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC080
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC081
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC082
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC083
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC084
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC085
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC086
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC087
|
||||
cipher_TLS_ECDH_ECDSA_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC088
|
||||
cipher_TLS_ECDH_ECDSA_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC089
|
||||
cipher_TLS_ECDHE_RSA_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC08A
|
||||
cipher_TLS_ECDHE_RSA_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC08B
|
||||
cipher_TLS_ECDH_RSA_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC08C
|
||||
cipher_TLS_ECDH_RSA_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC08D
|
||||
cipher_TLS_PSK_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC08E
|
||||
cipher_TLS_PSK_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC08F
|
||||
cipher_TLS_DHE_PSK_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC090
|
||||
cipher_TLS_DHE_PSK_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC091
|
||||
cipher_TLS_RSA_PSK_WITH_CAMELLIA_128_GCM_SHA256 uint16 = 0xC092
|
||||
cipher_TLS_RSA_PSK_WITH_CAMELLIA_256_GCM_SHA384 uint16 = 0xC093
|
||||
cipher_TLS_PSK_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0xC094
|
||||
cipher_TLS_PSK_WITH_CAMELLIA_256_CBC_SHA384 uint16 = 0xC095
|
||||
cipher_TLS_DHE_PSK_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0xC096
|
||||
cipher_TLS_DHE_PSK_WITH_CAMELLIA_256_CBC_SHA384 uint16 = 0xC097
|
||||
cipher_TLS_RSA_PSK_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0xC098
|
||||
cipher_TLS_RSA_PSK_WITH_CAMELLIA_256_CBC_SHA384 uint16 = 0xC099
|
||||
cipher_TLS_ECDHE_PSK_WITH_CAMELLIA_128_CBC_SHA256 uint16 = 0xC09A
|
||||
cipher_TLS_ECDHE_PSK_WITH_CAMELLIA_256_CBC_SHA384 uint16 = 0xC09B
|
||||
cipher_TLS_RSA_WITH_AES_128_CCM uint16 = 0xC09C
|
||||
cipher_TLS_RSA_WITH_AES_256_CCM uint16 = 0xC09D
|
||||
cipher_TLS_DHE_RSA_WITH_AES_128_CCM uint16 = 0xC09E
|
||||
cipher_TLS_DHE_RSA_WITH_AES_256_CCM uint16 = 0xC09F
|
||||
cipher_TLS_RSA_WITH_AES_128_CCM_8 uint16 = 0xC0A0
|
||||
cipher_TLS_RSA_WITH_AES_256_CCM_8 uint16 = 0xC0A1
|
||||
cipher_TLS_DHE_RSA_WITH_AES_128_CCM_8 uint16 = 0xC0A2
|
||||
cipher_TLS_DHE_RSA_WITH_AES_256_CCM_8 uint16 = 0xC0A3
|
||||
cipher_TLS_PSK_WITH_AES_128_CCM uint16 = 0xC0A4
|
||||
cipher_TLS_PSK_WITH_AES_256_CCM uint16 = 0xC0A5
|
||||
cipher_TLS_DHE_PSK_WITH_AES_128_CCM uint16 = 0xC0A6
|
||||
cipher_TLS_DHE_PSK_WITH_AES_256_CCM uint16 = 0xC0A7
|
||||
cipher_TLS_PSK_WITH_AES_128_CCM_8 uint16 = 0xC0A8
|
||||
cipher_TLS_PSK_WITH_AES_256_CCM_8 uint16 = 0xC0A9
|
||||
cipher_TLS_PSK_DHE_WITH_AES_128_CCM_8 uint16 = 0xC0AA
|
||||
cipher_TLS_PSK_DHE_WITH_AES_256_CCM_8 uint16 = 0xC0AB
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_128_CCM uint16 = 0xC0AC
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_256_CCM uint16 = 0xC0AD
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8 uint16 = 0xC0AE
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8 uint16 = 0xC0AF
|
||||
// Unassigned uint16 = 0xC0B0-FF
|
||||
// Unassigned uint16 = 0xC1-CB,*
|
||||
// Unassigned uint16 = 0xCC00-A7
|
||||
cipher_TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xCCA8
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xCCA9
|
||||
cipher_TLS_DHE_RSA_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xCCAA
|
||||
cipher_TLS_PSK_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xCCAB
|
||||
cipher_TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xCCAC
|
||||
cipher_TLS_DHE_PSK_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xCCAD
|
||||
cipher_TLS_RSA_PSK_WITH_CHACHA20_POLY1305_SHA256 uint16 = 0xCCAE
|
||||
)
|
||||
|
||||
// isBadCipher reports whether the cipher is blacklisted by the HTTP/2 spec.
|
||||
// References:
|
||||
// https://tools.ietf.org/html/rfc7540#appendix-A
|
||||
// Reject cipher suites from Appendix A.
|
||||
// "This list includes those cipher suites that do not
|
||||
// offer an ephemeral key exchange and those that are
|
||||
// based on the TLS null, stream or block cipher type"
|
||||
func isBadCipher(cipher uint16) bool {
|
||||
switch cipher {
|
||||
case cipher_TLS_NULL_WITH_NULL_NULL,
|
||||
cipher_TLS_RSA_WITH_NULL_MD5,
|
||||
cipher_TLS_RSA_WITH_NULL_SHA,
|
||||
cipher_TLS_RSA_EXPORT_WITH_RC4_40_MD5,
|
||||
cipher_TLS_RSA_WITH_RC4_128_MD5,
|
||||
cipher_TLS_RSA_WITH_RC4_128_SHA,
|
||||
cipher_TLS_RSA_EXPORT_WITH_RC2_CBC_40_MD5,
|
||||
cipher_TLS_RSA_WITH_IDEA_CBC_SHA,
|
||||
cipher_TLS_RSA_EXPORT_WITH_DES40_CBC_SHA,
|
||||
cipher_TLS_RSA_WITH_DES_CBC_SHA,
|
||||
cipher_TLS_RSA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_DH_DSS_EXPORT_WITH_DES40_CBC_SHA,
|
||||
cipher_TLS_DH_DSS_WITH_DES_CBC_SHA,
|
||||
cipher_TLS_DH_DSS_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_DH_RSA_EXPORT_WITH_DES40_CBC_SHA,
|
||||
cipher_TLS_DH_RSA_WITH_DES_CBC_SHA,
|
||||
cipher_TLS_DH_RSA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA,
|
||||
cipher_TLS_DHE_DSS_WITH_DES_CBC_SHA,
|
||||
cipher_TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_WITH_DES_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_DH_anon_EXPORT_WITH_RC4_40_MD5,
|
||||
cipher_TLS_DH_anon_WITH_RC4_128_MD5,
|
||||
cipher_TLS_DH_anon_EXPORT_WITH_DES40_CBC_SHA,
|
||||
cipher_TLS_DH_anon_WITH_DES_CBC_SHA,
|
||||
cipher_TLS_DH_anon_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_KRB5_WITH_DES_CBC_SHA,
|
||||
cipher_TLS_KRB5_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_KRB5_WITH_RC4_128_SHA,
|
||||
cipher_TLS_KRB5_WITH_IDEA_CBC_SHA,
|
||||
cipher_TLS_KRB5_WITH_DES_CBC_MD5,
|
||||
cipher_TLS_KRB5_WITH_3DES_EDE_CBC_MD5,
|
||||
cipher_TLS_KRB5_WITH_RC4_128_MD5,
|
||||
cipher_TLS_KRB5_WITH_IDEA_CBC_MD5,
|
||||
cipher_TLS_KRB5_EXPORT_WITH_DES_CBC_40_SHA,
|
||||
cipher_TLS_KRB5_EXPORT_WITH_RC2_CBC_40_SHA,
|
||||
cipher_TLS_KRB5_EXPORT_WITH_RC4_40_SHA,
|
||||
cipher_TLS_KRB5_EXPORT_WITH_DES_CBC_40_MD5,
|
||||
cipher_TLS_KRB5_EXPORT_WITH_RC2_CBC_40_MD5,
|
||||
cipher_TLS_KRB5_EXPORT_WITH_RC4_40_MD5,
|
||||
cipher_TLS_PSK_WITH_NULL_SHA,
|
||||
cipher_TLS_DHE_PSK_WITH_NULL_SHA,
|
||||
cipher_TLS_RSA_PSK_WITH_NULL_SHA,
|
||||
cipher_TLS_RSA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_DH_DSS_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_DH_RSA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_DHE_DSS_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_DH_anon_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_RSA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_DH_DSS_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_DH_RSA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_DHE_DSS_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_DH_anon_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_RSA_WITH_NULL_SHA256,
|
||||
cipher_TLS_RSA_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_RSA_WITH_AES_256_CBC_SHA256,
|
||||
cipher_TLS_DH_DSS_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_DH_RSA_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_DHE_DSS_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_128_CBC_SHA,
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA,
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA,
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA,
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_128_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_DH_DSS_WITH_AES_256_CBC_SHA256,
|
||||
cipher_TLS_DH_RSA_WITH_AES_256_CBC_SHA256,
|
||||
cipher_TLS_DHE_DSS_WITH_AES_256_CBC_SHA256,
|
||||
cipher_TLS_DHE_RSA_WITH_AES_256_CBC_SHA256,
|
||||
cipher_TLS_DH_anon_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_DH_anon_WITH_AES_256_CBC_SHA256,
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_256_CBC_SHA,
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA,
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA,
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA,
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_256_CBC_SHA,
|
||||
cipher_TLS_PSK_WITH_RC4_128_SHA,
|
||||
cipher_TLS_PSK_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_PSK_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_PSK_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_DHE_PSK_WITH_RC4_128_SHA,
|
||||
cipher_TLS_DHE_PSK_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_DHE_PSK_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_DHE_PSK_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_RSA_PSK_WITH_RC4_128_SHA,
|
||||
cipher_TLS_RSA_PSK_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_RSA_PSK_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_RSA_PSK_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_RSA_WITH_SEED_CBC_SHA,
|
||||
cipher_TLS_DH_DSS_WITH_SEED_CBC_SHA,
|
||||
cipher_TLS_DH_RSA_WITH_SEED_CBC_SHA,
|
||||
cipher_TLS_DHE_DSS_WITH_SEED_CBC_SHA,
|
||||
cipher_TLS_DHE_RSA_WITH_SEED_CBC_SHA,
|
||||
cipher_TLS_DH_anon_WITH_SEED_CBC_SHA,
|
||||
cipher_TLS_RSA_WITH_AES_128_GCM_SHA256,
|
||||
cipher_TLS_RSA_WITH_AES_256_GCM_SHA384,
|
||||
cipher_TLS_DH_RSA_WITH_AES_128_GCM_SHA256,
|
||||
cipher_TLS_DH_RSA_WITH_AES_256_GCM_SHA384,
|
||||
cipher_TLS_DH_DSS_WITH_AES_128_GCM_SHA256,
|
||||
cipher_TLS_DH_DSS_WITH_AES_256_GCM_SHA384,
|
||||
cipher_TLS_DH_anon_WITH_AES_128_GCM_SHA256,
|
||||
cipher_TLS_DH_anon_WITH_AES_256_GCM_SHA384,
|
||||
cipher_TLS_PSK_WITH_AES_128_GCM_SHA256,
|
||||
cipher_TLS_PSK_WITH_AES_256_GCM_SHA384,
|
||||
cipher_TLS_RSA_PSK_WITH_AES_128_GCM_SHA256,
|
||||
cipher_TLS_RSA_PSK_WITH_AES_256_GCM_SHA384,
|
||||
cipher_TLS_PSK_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_PSK_WITH_AES_256_CBC_SHA384,
|
||||
cipher_TLS_PSK_WITH_NULL_SHA256,
|
||||
cipher_TLS_PSK_WITH_NULL_SHA384,
|
||||
cipher_TLS_DHE_PSK_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_DHE_PSK_WITH_AES_256_CBC_SHA384,
|
||||
cipher_TLS_DHE_PSK_WITH_NULL_SHA256,
|
||||
cipher_TLS_DHE_PSK_WITH_NULL_SHA384,
|
||||
cipher_TLS_RSA_PSK_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_RSA_PSK_WITH_AES_256_CBC_SHA384,
|
||||
cipher_TLS_RSA_PSK_WITH_NULL_SHA256,
|
||||
cipher_TLS_RSA_PSK_WITH_NULL_SHA384,
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256,
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA256,
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA256,
|
||||
cipher_TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA256,
|
||||
cipher_TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256,
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_256_CBC_SHA256,
|
||||
cipher_TLS_EMPTY_RENEGOTIATION_INFO_SCSV,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_NULL_SHA,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_RC4_128_SHA,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_NULL_SHA,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_RC4_128_SHA,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_ECDH_RSA_WITH_NULL_SHA,
|
||||
cipher_TLS_ECDH_RSA_WITH_RC4_128_SHA,
|
||||
cipher_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_ECDHE_RSA_WITH_NULL_SHA,
|
||||
cipher_TLS_ECDHE_RSA_WITH_RC4_128_SHA,
|
||||
cipher_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_ECDH_anon_WITH_NULL_SHA,
|
||||
cipher_TLS_ECDH_anon_WITH_RC4_128_SHA,
|
||||
cipher_TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_ECDH_anon_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_ECDH_anon_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_RSA_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_DSS_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_RSA_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_SRP_SHA_DSS_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384,
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384,
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256,
|
||||
cipher_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384,
|
||||
cipher_TLS_ECDHE_PSK_WITH_RC4_128_SHA,
|
||||
cipher_TLS_ECDHE_PSK_WITH_3DES_EDE_CBC_SHA,
|
||||
cipher_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA,
|
||||
cipher_TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA,
|
||||
cipher_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA384,
|
||||
cipher_TLS_ECDHE_PSK_WITH_NULL_SHA,
|
||||
cipher_TLS_ECDHE_PSK_WITH_NULL_SHA256,
|
||||
cipher_TLS_ECDHE_PSK_WITH_NULL_SHA384,
|
||||
cipher_TLS_RSA_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_RSA_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_DH_DSS_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_DH_DSS_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_DH_RSA_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_DH_RSA_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_DHE_DSS_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_DHE_DSS_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_DHE_RSA_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_DHE_RSA_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_DH_anon_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_DH_anon_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDHE_RSA_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_RSA_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDH_RSA_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDH_RSA_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_RSA_WITH_ARIA_128_GCM_SHA256,
|
||||
cipher_TLS_RSA_WITH_ARIA_256_GCM_SHA384,
|
||||
cipher_TLS_DH_RSA_WITH_ARIA_128_GCM_SHA256,
|
||||
cipher_TLS_DH_RSA_WITH_ARIA_256_GCM_SHA384,
|
||||
cipher_TLS_DH_DSS_WITH_ARIA_128_GCM_SHA256,
|
||||
cipher_TLS_DH_DSS_WITH_ARIA_256_GCM_SHA384,
|
||||
cipher_TLS_DH_anon_WITH_ARIA_128_GCM_SHA256,
|
||||
cipher_TLS_DH_anon_WITH_ARIA_256_GCM_SHA384,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_ARIA_128_GCM_SHA256,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_ARIA_256_GCM_SHA384,
|
||||
cipher_TLS_ECDH_RSA_WITH_ARIA_128_GCM_SHA256,
|
||||
cipher_TLS_ECDH_RSA_WITH_ARIA_256_GCM_SHA384,
|
||||
cipher_TLS_PSK_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_PSK_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_DHE_PSK_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_DHE_PSK_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_RSA_PSK_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_RSA_PSK_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_PSK_WITH_ARIA_128_GCM_SHA256,
|
||||
cipher_TLS_PSK_WITH_ARIA_256_GCM_SHA384,
|
||||
cipher_TLS_RSA_PSK_WITH_ARIA_128_GCM_SHA256,
|
||||
cipher_TLS_RSA_PSK_WITH_ARIA_256_GCM_SHA384,
|
||||
cipher_TLS_ECDHE_PSK_WITH_ARIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_PSK_WITH_ARIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_ECDSA_WITH_CAMELLIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_CAMELLIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDHE_RSA_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_RSA_WITH_CAMELLIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDH_RSA_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDH_RSA_WITH_CAMELLIA_256_CBC_SHA384,
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_128_GCM_SHA256,
|
||||
cipher_TLS_RSA_WITH_CAMELLIA_256_GCM_SHA384,
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_128_GCM_SHA256,
|
||||
cipher_TLS_DH_RSA_WITH_CAMELLIA_256_GCM_SHA384,
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_128_GCM_SHA256,
|
||||
cipher_TLS_DH_DSS_WITH_CAMELLIA_256_GCM_SHA384,
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_128_GCM_SHA256,
|
||||
cipher_TLS_DH_anon_WITH_CAMELLIA_256_GCM_SHA384,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_CAMELLIA_128_GCM_SHA256,
|
||||
cipher_TLS_ECDH_ECDSA_WITH_CAMELLIA_256_GCM_SHA384,
|
||||
cipher_TLS_ECDH_RSA_WITH_CAMELLIA_128_GCM_SHA256,
|
||||
cipher_TLS_ECDH_RSA_WITH_CAMELLIA_256_GCM_SHA384,
|
||||
cipher_TLS_PSK_WITH_CAMELLIA_128_GCM_SHA256,
|
||||
cipher_TLS_PSK_WITH_CAMELLIA_256_GCM_SHA384,
|
||||
cipher_TLS_RSA_PSK_WITH_CAMELLIA_128_GCM_SHA256,
|
||||
cipher_TLS_RSA_PSK_WITH_CAMELLIA_256_GCM_SHA384,
|
||||
cipher_TLS_PSK_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_PSK_WITH_CAMELLIA_256_CBC_SHA384,
|
||||
cipher_TLS_DHE_PSK_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_DHE_PSK_WITH_CAMELLIA_256_CBC_SHA384,
|
||||
cipher_TLS_RSA_PSK_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_RSA_PSK_WITH_CAMELLIA_256_CBC_SHA384,
|
||||
cipher_TLS_ECDHE_PSK_WITH_CAMELLIA_128_CBC_SHA256,
|
||||
cipher_TLS_ECDHE_PSK_WITH_CAMELLIA_256_CBC_SHA384,
|
||||
cipher_TLS_RSA_WITH_AES_128_CCM,
|
||||
cipher_TLS_RSA_WITH_AES_256_CCM,
|
||||
cipher_TLS_RSA_WITH_AES_128_CCM_8,
|
||||
cipher_TLS_RSA_WITH_AES_256_CCM_8,
|
||||
cipher_TLS_PSK_WITH_AES_128_CCM,
|
||||
cipher_TLS_PSK_WITH_AES_256_CCM,
|
||||
cipher_TLS_PSK_WITH_AES_128_CCM_8,
|
||||
cipher_TLS_PSK_WITH_AES_256_CCM_8:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,256 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Transport code's client connection pooling.
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"net/http"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// ClientConnPool manages a pool of HTTP/2 client connections.
|
||||
type ClientConnPool interface {
|
||||
GetClientConn(req *http.Request, addr string) (*ClientConn, error)
|
||||
MarkDead(*ClientConn)
|
||||
}
|
||||
|
||||
// clientConnPoolIdleCloser is the interface implemented by ClientConnPool
|
||||
// implementations which can close their idle connections.
|
||||
type clientConnPoolIdleCloser interface {
|
||||
ClientConnPool
|
||||
closeIdleConnections()
|
||||
}
|
||||
|
||||
var (
|
||||
_ clientConnPoolIdleCloser = (*clientConnPool)(nil)
|
||||
_ clientConnPoolIdleCloser = noDialClientConnPool{}
|
||||
)
|
||||
|
||||
// TODO: use singleflight for dialing and addConnCalls?
|
||||
type clientConnPool struct {
|
||||
t *Transport
|
||||
|
||||
mu sync.Mutex // TODO: maybe switch to RWMutex
|
||||
// TODO: add support for sharing conns based on cert names
|
||||
// (e.g. share conn for googleapis.com and appspot.com)
|
||||
conns map[string][]*ClientConn // key is host:port
|
||||
dialing map[string]*dialCall // currently in-flight dials
|
||||
keys map[*ClientConn][]string
|
||||
addConnCalls map[string]*addConnCall // in-flight addConnIfNeede calls
|
||||
}
|
||||
|
||||
func (p *clientConnPool) GetClientConn(req *http.Request, addr string) (*ClientConn, error) {
|
||||
return p.getClientConn(req, addr, dialOnMiss)
|
||||
}
|
||||
|
||||
const (
|
||||
dialOnMiss = true
|
||||
noDialOnMiss = false
|
||||
)
|
||||
|
||||
func (p *clientConnPool) getClientConn(req *http.Request, addr string, dialOnMiss bool) (*ClientConn, error) {
|
||||
if isConnectionCloseRequest(req) && dialOnMiss {
|
||||
// It gets its own connection.
|
||||
const singleUse = true
|
||||
cc, err := p.t.dialClientConn(addr, singleUse)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cc, nil
|
||||
}
|
||||
p.mu.Lock()
|
||||
for _, cc := range p.conns[addr] {
|
||||
if cc.CanTakeNewRequest() {
|
||||
p.mu.Unlock()
|
||||
return cc, nil
|
||||
}
|
||||
}
|
||||
if !dialOnMiss {
|
||||
p.mu.Unlock()
|
||||
return nil, ErrNoCachedConn
|
||||
}
|
||||
call := p.getStartDialLocked(addr)
|
||||
p.mu.Unlock()
|
||||
<-call.done
|
||||
return call.res, call.err
|
||||
}
|
||||
|
||||
// dialCall is an in-flight Transport dial call to a host.
|
||||
type dialCall struct {
|
||||
p *clientConnPool
|
||||
done chan struct{} // closed when done
|
||||
res *ClientConn // valid after done is closed
|
||||
err error // valid after done is closed
|
||||
}
|
||||
|
||||
// requires p.mu is held.
|
||||
func (p *clientConnPool) getStartDialLocked(addr string) *dialCall {
|
||||
if call, ok := p.dialing[addr]; ok {
|
||||
// A dial is already in-flight. Don't start another.
|
||||
return call
|
||||
}
|
||||
call := &dialCall{p: p, done: make(chan struct{})}
|
||||
if p.dialing == nil {
|
||||
p.dialing = make(map[string]*dialCall)
|
||||
}
|
||||
p.dialing[addr] = call
|
||||
go call.dial(addr)
|
||||
return call
|
||||
}
|
||||
|
||||
// run in its own goroutine.
|
||||
func (c *dialCall) dial(addr string) {
|
||||
const singleUse = false // shared conn
|
||||
c.res, c.err = c.p.t.dialClientConn(addr, singleUse)
|
||||
close(c.done)
|
||||
|
||||
c.p.mu.Lock()
|
||||
delete(c.p.dialing, addr)
|
||||
if c.err == nil {
|
||||
c.p.addConnLocked(addr, c.res)
|
||||
}
|
||||
c.p.mu.Unlock()
|
||||
}
|
||||
|
||||
// addConnIfNeeded makes a NewClientConn out of c if a connection for key doesn't
|
||||
// already exist. It coalesces concurrent calls with the same key.
|
||||
// This is used by the http1 Transport code when it creates a new connection. Because
|
||||
// the http1 Transport doesn't de-dup TCP dials to outbound hosts (because it doesn't know
|
||||
// the protocol), it can get into a situation where it has multiple TLS connections.
|
||||
// This code decides which ones live or die.
|
||||
// The return value used is whether c was used.
|
||||
// c is never closed.
|
||||
func (p *clientConnPool) addConnIfNeeded(key string, t *Transport, c *tls.Conn) (used bool, err error) {
|
||||
p.mu.Lock()
|
||||
for _, cc := range p.conns[key] {
|
||||
if cc.CanTakeNewRequest() {
|
||||
p.mu.Unlock()
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
call, dup := p.addConnCalls[key]
|
||||
if !dup {
|
||||
if p.addConnCalls == nil {
|
||||
p.addConnCalls = make(map[string]*addConnCall)
|
||||
}
|
||||
call = &addConnCall{
|
||||
p: p,
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
p.addConnCalls[key] = call
|
||||
go call.run(t, key, c)
|
||||
}
|
||||
p.mu.Unlock()
|
||||
|
||||
<-call.done
|
||||
if call.err != nil {
|
||||
return false, call.err
|
||||
}
|
||||
return !dup, nil
|
||||
}
|
||||
|
||||
type addConnCall struct {
|
||||
p *clientConnPool
|
||||
done chan struct{} // closed when done
|
||||
err error
|
||||
}
|
||||
|
||||
func (c *addConnCall) run(t *Transport, key string, tc *tls.Conn) {
|
||||
cc, err := t.NewClientConn(tc)
|
||||
|
||||
p := c.p
|
||||
p.mu.Lock()
|
||||
if err != nil {
|
||||
c.err = err
|
||||
} else {
|
||||
p.addConnLocked(key, cc)
|
||||
}
|
||||
delete(p.addConnCalls, key)
|
||||
p.mu.Unlock()
|
||||
close(c.done)
|
||||
}
|
||||
|
||||
func (p *clientConnPool) addConn(key string, cc *ClientConn) {
|
||||
p.mu.Lock()
|
||||
p.addConnLocked(key, cc)
|
||||
p.mu.Unlock()
|
||||
}
|
||||
|
||||
// p.mu must be held
|
||||
func (p *clientConnPool) addConnLocked(key string, cc *ClientConn) {
|
||||
for _, v := range p.conns[key] {
|
||||
if v == cc {
|
||||
return
|
||||
}
|
||||
}
|
||||
if p.conns == nil {
|
||||
p.conns = make(map[string][]*ClientConn)
|
||||
}
|
||||
if p.keys == nil {
|
||||
p.keys = make(map[*ClientConn][]string)
|
||||
}
|
||||
p.conns[key] = append(p.conns[key], cc)
|
||||
p.keys[cc] = append(p.keys[cc], key)
|
||||
}
|
||||
|
||||
func (p *clientConnPool) MarkDead(cc *ClientConn) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
for _, key := range p.keys[cc] {
|
||||
vv, ok := p.conns[key]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
newList := filterOutClientConn(vv, cc)
|
||||
if len(newList) > 0 {
|
||||
p.conns[key] = newList
|
||||
} else {
|
||||
delete(p.conns, key)
|
||||
}
|
||||
}
|
||||
delete(p.keys, cc)
|
||||
}
|
||||
|
||||
func (p *clientConnPool) closeIdleConnections() {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
// TODO: don't close a cc if it was just added to the pool
|
||||
// milliseconds ago and has never been used. There's currently
|
||||
// a small race window with the HTTP/1 Transport's integration
|
||||
// where it can add an idle conn just before using it, and
|
||||
// somebody else can concurrently call CloseIdleConns and
|
||||
// break some caller's RoundTrip.
|
||||
for _, vv := range p.conns {
|
||||
for _, cc := range vv {
|
||||
cc.closeIfIdle()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func filterOutClientConn(in []*ClientConn, exclude *ClientConn) []*ClientConn {
|
||||
out := in[:0]
|
||||
for _, v := range in {
|
||||
if v != exclude {
|
||||
out = append(out, v)
|
||||
}
|
||||
}
|
||||
// If we filtered it out, zero out the last item to prevent
|
||||
// the GC from seeing it.
|
||||
if len(in) != len(out) {
|
||||
in[len(in)-1] = nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// noDialClientConnPool is an implementation of http2.ClientConnPool
|
||||
// which never dials. We let the HTTP/1.1 client dial and use its TLS
|
||||
// connection instead.
|
||||
type noDialClientConnPool struct{ *clientConnPool }
|
||||
|
||||
func (p noDialClientConnPool) GetClientConn(req *http.Request, addr string) (*ClientConn, error) {
|
||||
return p.getClientConn(req, addr, noDialOnMiss)
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.6
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func configureTransport(t1 *http.Transport) (*Transport, error) {
|
||||
connPool := new(clientConnPool)
|
||||
t2 := &Transport{
|
||||
ConnPool: noDialClientConnPool{connPool},
|
||||
t1: t1,
|
||||
}
|
||||
connPool.t = t2
|
||||
if err := registerHTTPSProtocol(t1, noDialH2RoundTripper{t2}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if t1.TLSClientConfig == nil {
|
||||
t1.TLSClientConfig = new(tls.Config)
|
||||
}
|
||||
if !strSliceContains(t1.TLSClientConfig.NextProtos, "h2") {
|
||||
t1.TLSClientConfig.NextProtos = append([]string{"h2"}, t1.TLSClientConfig.NextProtos...)
|
||||
}
|
||||
if !strSliceContains(t1.TLSClientConfig.NextProtos, "http/1.1") {
|
||||
t1.TLSClientConfig.NextProtos = append(t1.TLSClientConfig.NextProtos, "http/1.1")
|
||||
}
|
||||
upgradeFn := func(authority string, c *tls.Conn) http.RoundTripper {
|
||||
addr := authorityAddr("https", authority)
|
||||
if used, err := connPool.addConnIfNeeded(addr, t2, c); err != nil {
|
||||
go c.Close()
|
||||
return erringRoundTripper{err}
|
||||
} else if !used {
|
||||
// Turns out we don't need this c.
|
||||
// For example, two goroutines made requests to the same host
|
||||
// at the same time, both kicking off TCP dials. (since protocol
|
||||
// was unknown)
|
||||
go c.Close()
|
||||
}
|
||||
return t2
|
||||
}
|
||||
if m := t1.TLSNextProto; len(m) == 0 {
|
||||
t1.TLSNextProto = map[string]func(string, *tls.Conn) http.RoundTripper{
|
||||
"h2": upgradeFn,
|
||||
}
|
||||
} else {
|
||||
m["h2"] = upgradeFn
|
||||
}
|
||||
return t2, nil
|
||||
}
|
||||
|
||||
// registerHTTPSProtocol calls Transport.RegisterProtocol but
|
||||
// converting panics into errors.
|
||||
func registerHTTPSProtocol(t *http.Transport, rt http.RoundTripper) (err error) {
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
err = fmt.Errorf("%v", e)
|
||||
}
|
||||
}()
|
||||
t.RegisterProtocol("https", rt)
|
||||
return nil
|
||||
}
|
||||
|
||||
// noDialH2RoundTripper is a RoundTripper which only tries to complete the request
|
||||
// if there's already has a cached connection to the host.
|
||||
type noDialH2RoundTripper struct{ t *Transport }
|
||||
|
||||
func (rt noDialH2RoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
res, err := rt.t.RoundTrip(req)
|
||||
if err == ErrNoCachedConn {
|
||||
return nil, http.ErrSkipAltProtocol
|
||||
}
|
||||
return res, err
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Buffer chunks are allocated from a pool to reduce pressure on GC.
|
||||
// The maximum wasted space per dataBuffer is 2x the largest size class,
|
||||
// which happens when the dataBuffer has multiple chunks and there is
|
||||
// one unread byte in both the first and last chunks. We use a few size
|
||||
// classes to minimize overheads for servers that typically receive very
|
||||
// small request bodies.
|
||||
//
|
||||
// TODO: Benchmark to determine if the pools are necessary. The GC may have
|
||||
// improved enough that we can instead allocate chunks like this:
|
||||
// make([]byte, max(16<<10, expectedBytesRemaining))
|
||||
var (
|
||||
dataChunkSizeClasses = []int{
|
||||
1 << 10,
|
||||
2 << 10,
|
||||
4 << 10,
|
||||
8 << 10,
|
||||
16 << 10,
|
||||
}
|
||||
dataChunkPools = [...]sync.Pool{
|
||||
{New: func() interface{} { return make([]byte, 1<<10) }},
|
||||
{New: func() interface{} { return make([]byte, 2<<10) }},
|
||||
{New: func() interface{} { return make([]byte, 4<<10) }},
|
||||
{New: func() interface{} { return make([]byte, 8<<10) }},
|
||||
{New: func() interface{} { return make([]byte, 16<<10) }},
|
||||
}
|
||||
)
|
||||
|
||||
func getDataBufferChunk(size int64) []byte {
|
||||
i := 0
|
||||
for ; i < len(dataChunkSizeClasses)-1; i++ {
|
||||
if size <= int64(dataChunkSizeClasses[i]) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return dataChunkPools[i].Get().([]byte)
|
||||
}
|
||||
|
||||
func putDataBufferChunk(p []byte) {
|
||||
for i, n := range dataChunkSizeClasses {
|
||||
if len(p) == n {
|
||||
dataChunkPools[i].Put(p)
|
||||
return
|
||||
}
|
||||
}
|
||||
panic(fmt.Sprintf("unexpected buffer len=%v", len(p)))
|
||||
}
|
||||
|
||||
// dataBuffer is an io.ReadWriter backed by a list of data chunks.
|
||||
// Each dataBuffer is used to read DATA frames on a single stream.
|
||||
// The buffer is divided into chunks so the server can limit the
|
||||
// total memory used by a single connection without limiting the
|
||||
// request body size on any single stream.
|
||||
type dataBuffer struct {
|
||||
chunks [][]byte
|
||||
r int // next byte to read is chunks[0][r]
|
||||
w int // next byte to write is chunks[len(chunks)-1][w]
|
||||
size int // total buffered bytes
|
||||
expected int64 // we expect at least this many bytes in future Write calls (ignored if <= 0)
|
||||
}
|
||||
|
||||
var errReadEmpty = errors.New("read from empty dataBuffer")
|
||||
|
||||
// Read copies bytes from the buffer into p.
|
||||
// It is an error to read when no data is available.
|
||||
func (b *dataBuffer) Read(p []byte) (int, error) {
|
||||
if b.size == 0 {
|
||||
return 0, errReadEmpty
|
||||
}
|
||||
var ntotal int
|
||||
for len(p) > 0 && b.size > 0 {
|
||||
readFrom := b.bytesFromFirstChunk()
|
||||
n := copy(p, readFrom)
|
||||
p = p[n:]
|
||||
ntotal += n
|
||||
b.r += n
|
||||
b.size -= n
|
||||
// If the first chunk has been consumed, advance to the next chunk.
|
||||
if b.r == len(b.chunks[0]) {
|
||||
putDataBufferChunk(b.chunks[0])
|
||||
end := len(b.chunks) - 1
|
||||
copy(b.chunks[:end], b.chunks[1:])
|
||||
b.chunks[end] = nil
|
||||
b.chunks = b.chunks[:end]
|
||||
b.r = 0
|
||||
}
|
||||
}
|
||||
return ntotal, nil
|
||||
}
|
||||
|
||||
func (b *dataBuffer) bytesFromFirstChunk() []byte {
|
||||
if len(b.chunks) == 1 {
|
||||
return b.chunks[0][b.r:b.w]
|
||||
}
|
||||
return b.chunks[0][b.r:]
|
||||
}
|
||||
|
||||
// Len returns the number of bytes of the unread portion of the buffer.
|
||||
func (b *dataBuffer) Len() int {
|
||||
return b.size
|
||||
}
|
||||
|
||||
// Write appends p to the buffer.
|
||||
func (b *dataBuffer) Write(p []byte) (int, error) {
|
||||
ntotal := len(p)
|
||||
for len(p) > 0 {
|
||||
// If the last chunk is empty, allocate a new chunk. Try to allocate
|
||||
// enough to fully copy p plus any additional bytes we expect to
|
||||
// receive. However, this may allocate less than len(p).
|
||||
want := int64(len(p))
|
||||
if b.expected > want {
|
||||
want = b.expected
|
||||
}
|
||||
chunk := b.lastChunkOrAlloc(want)
|
||||
n := copy(chunk[b.w:], p)
|
||||
p = p[n:]
|
||||
b.w += n
|
||||
b.size += n
|
||||
b.expected -= int64(n)
|
||||
}
|
||||
return ntotal, nil
|
||||
}
|
||||
|
||||
func (b *dataBuffer) lastChunkOrAlloc(want int64) []byte {
|
||||
if len(b.chunks) != 0 {
|
||||
last := b.chunks[len(b.chunks)-1]
|
||||
if b.w < len(last) {
|
||||
return last
|
||||
}
|
||||
}
|
||||
chunk := getDataBufferChunk(want)
|
||||
b.chunks = append(b.chunks, chunk)
|
||||
b.w = 0
|
||||
return chunk
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// An ErrCode is an unsigned 32-bit error code as defined in the HTTP/2 spec.
|
||||
type ErrCode uint32
|
||||
|
||||
const (
|
||||
ErrCodeNo ErrCode = 0x0
|
||||
ErrCodeProtocol ErrCode = 0x1
|
||||
ErrCodeInternal ErrCode = 0x2
|
||||
ErrCodeFlowControl ErrCode = 0x3
|
||||
ErrCodeSettingsTimeout ErrCode = 0x4
|
||||
ErrCodeStreamClosed ErrCode = 0x5
|
||||
ErrCodeFrameSize ErrCode = 0x6
|
||||
ErrCodeRefusedStream ErrCode = 0x7
|
||||
ErrCodeCancel ErrCode = 0x8
|
||||
ErrCodeCompression ErrCode = 0x9
|
||||
ErrCodeConnect ErrCode = 0xa
|
||||
ErrCodeEnhanceYourCalm ErrCode = 0xb
|
||||
ErrCodeInadequateSecurity ErrCode = 0xc
|
||||
ErrCodeHTTP11Required ErrCode = 0xd
|
||||
)
|
||||
|
||||
var errCodeName = map[ErrCode]string{
|
||||
ErrCodeNo: "NO_ERROR",
|
||||
ErrCodeProtocol: "PROTOCOL_ERROR",
|
||||
ErrCodeInternal: "INTERNAL_ERROR",
|
||||
ErrCodeFlowControl: "FLOW_CONTROL_ERROR",
|
||||
ErrCodeSettingsTimeout: "SETTINGS_TIMEOUT",
|
||||
ErrCodeStreamClosed: "STREAM_CLOSED",
|
||||
ErrCodeFrameSize: "FRAME_SIZE_ERROR",
|
||||
ErrCodeRefusedStream: "REFUSED_STREAM",
|
||||
ErrCodeCancel: "CANCEL",
|
||||
ErrCodeCompression: "COMPRESSION_ERROR",
|
||||
ErrCodeConnect: "CONNECT_ERROR",
|
||||
ErrCodeEnhanceYourCalm: "ENHANCE_YOUR_CALM",
|
||||
ErrCodeInadequateSecurity: "INADEQUATE_SECURITY",
|
||||
ErrCodeHTTP11Required: "HTTP_1_1_REQUIRED",
|
||||
}
|
||||
|
||||
func (e ErrCode) String() string {
|
||||
if s, ok := errCodeName[e]; ok {
|
||||
return s
|
||||
}
|
||||
return fmt.Sprintf("unknown error code 0x%x", uint32(e))
|
||||
}
|
||||
|
||||
// ConnectionError is an error that results in the termination of the
|
||||
// entire connection.
|
||||
type ConnectionError ErrCode
|
||||
|
||||
func (e ConnectionError) Error() string { return fmt.Sprintf("connection error: %s", ErrCode(e)) }
|
||||
|
||||
// StreamError is an error that only affects one stream within an
|
||||
// HTTP/2 connection.
|
||||
type StreamError struct {
|
||||
StreamID uint32
|
||||
Code ErrCode
|
||||
Cause error // optional additional detail
|
||||
}
|
||||
|
||||
func streamError(id uint32, code ErrCode) StreamError {
|
||||
return StreamError{StreamID: id, Code: code}
|
||||
}
|
||||
|
||||
func (e StreamError) Error() string {
|
||||
if e.Cause != nil {
|
||||
return fmt.Sprintf("stream error: stream ID %d; %v; %v", e.StreamID, e.Code, e.Cause)
|
||||
}
|
||||
return fmt.Sprintf("stream error: stream ID %d; %v", e.StreamID, e.Code)
|
||||
}
|
||||
|
||||
// 6.9.1 The Flow Control Window
|
||||
// "If a sender receives a WINDOW_UPDATE that causes a flow control
|
||||
// window to exceed this maximum it MUST terminate either the stream
|
||||
// or the connection, as appropriate. For streams, [...]; for the
|
||||
// connection, a GOAWAY frame with a FLOW_CONTROL_ERROR code."
|
||||
type goAwayFlowError struct{}
|
||||
|
||||
func (goAwayFlowError) Error() string { return "connection exceeded flow control window size" }
|
||||
|
||||
// connError represents an HTTP/2 ConnectionError error code, along
|
||||
// with a string (for debugging) explaining why.
|
||||
//
|
||||
// Errors of this type are only returned by the frame parser functions
|
||||
// and converted into ConnectionError(Code), after stashing away
|
||||
// the Reason into the Framer's errDetail field, accessible via
|
||||
// the (*Framer).ErrorDetail method.
|
||||
type connError struct {
|
||||
Code ErrCode // the ConnectionError error code
|
||||
Reason string // additional reason
|
||||
}
|
||||
|
||||
func (e connError) Error() string {
|
||||
return fmt.Sprintf("http2: connection error: %v: %v", e.Code, e.Reason)
|
||||
}
|
||||
|
||||
type pseudoHeaderError string
|
||||
|
||||
func (e pseudoHeaderError) Error() string {
|
||||
return fmt.Sprintf("invalid pseudo-header %q", string(e))
|
||||
}
|
||||
|
||||
type duplicatePseudoHeaderError string
|
||||
|
||||
func (e duplicatePseudoHeaderError) Error() string {
|
||||
return fmt.Sprintf("duplicate pseudo-header %q", string(e))
|
||||
}
|
||||
|
||||
type headerFieldNameError string
|
||||
|
||||
func (e headerFieldNameError) Error() string {
|
||||
return fmt.Sprintf("invalid header field name %q", string(e))
|
||||
}
|
||||
|
||||
type headerFieldValueError string
|
||||
|
||||
func (e headerFieldValueError) Error() string {
|
||||
return fmt.Sprintf("invalid header field value %q", string(e))
|
||||
}
|
||||
|
||||
var (
|
||||
errMixPseudoHeaderTypes = errors.New("mix of request and response pseudo headers")
|
||||
errPseudoAfterRegular = errors.New("pseudo header field after regular")
|
||||
)
|
|
@ -0,0 +1,50 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Flow control
|
||||
|
||||
package http2
|
||||
|
||||
// flow is the flow control window's size.
|
||||
type flow struct {
|
||||
// n is the number of DATA bytes we're allowed to send.
|
||||
// A flow is kept both on a conn and a per-stream.
|
||||
n int32
|
||||
|
||||
// conn points to the shared connection-level flow that is
|
||||
// shared by all streams on that conn. It is nil for the flow
|
||||
// that's on the conn directly.
|
||||
conn *flow
|
||||
}
|
||||
|
||||
func (f *flow) setConnFlow(cf *flow) { f.conn = cf }
|
||||
|
||||
func (f *flow) available() int32 {
|
||||
n := f.n
|
||||
if f.conn != nil && f.conn.n < n {
|
||||
n = f.conn.n
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (f *flow) take(n int32) {
|
||||
if n > f.available() {
|
||||
panic("internal error: took too much")
|
||||
}
|
||||
f.n -= n
|
||||
if f.conn != nil {
|
||||
f.conn.n -= n
|
||||
}
|
||||
}
|
||||
|
||||
// add adds n bytes (positive or negative) to the flow control window.
|
||||
// It returns false if the sum would exceed 2^31-1.
|
||||
func (f *flow) add(n int32) bool {
|
||||
remain := (1<<31 - 1) - f.n
|
||||
if n > remain {
|
||||
return false
|
||||
}
|
||||
f.n += n
|
||||
return true
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,16 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.6
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
func transportExpectContinueTimeout(t1 *http.Transport) time.Duration {
|
||||
return t1.ExpectContinueTimeout
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.7
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptrace"
|
||||
"time"
|
||||
)
|
||||
|
||||
type contextContext interface {
|
||||
context.Context
|
||||
}
|
||||
|
||||
func serverConnBaseContext(c net.Conn, opts *ServeConnOpts) (ctx contextContext, cancel func()) {
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
ctx = context.WithValue(ctx, http.LocalAddrContextKey, c.LocalAddr())
|
||||
if hs := opts.baseConfig(); hs != nil {
|
||||
ctx = context.WithValue(ctx, http.ServerContextKey, hs)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func contextWithCancel(ctx contextContext) (_ contextContext, cancel func()) {
|
||||
return context.WithCancel(ctx)
|
||||
}
|
||||
|
||||
func requestWithContext(req *http.Request, ctx contextContext) *http.Request {
|
||||
return req.WithContext(ctx)
|
||||
}
|
||||
|
||||
type clientTrace httptrace.ClientTrace
|
||||
|
||||
func reqContext(r *http.Request) context.Context { return r.Context() }
|
||||
|
||||
func (t *Transport) idleConnTimeout() time.Duration {
|
||||
if t.t1 != nil {
|
||||
return t.t1.IdleConnTimeout
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func setResponseUncompressed(res *http.Response) { res.Uncompressed = true }
|
||||
|
||||
func traceGotConn(req *http.Request, cc *ClientConn) {
|
||||
trace := httptrace.ContextClientTrace(req.Context())
|
||||
if trace == nil || trace.GotConn == nil {
|
||||
return
|
||||
}
|
||||
ci := httptrace.GotConnInfo{Conn: cc.tconn}
|
||||
cc.mu.Lock()
|
||||
ci.Reused = cc.nextStreamID > 1
|
||||
ci.WasIdle = len(cc.streams) == 0 && ci.Reused
|
||||
if ci.WasIdle && !cc.lastActive.IsZero() {
|
||||
ci.IdleTime = time.Now().Sub(cc.lastActive)
|
||||
}
|
||||
cc.mu.Unlock()
|
||||
|
||||
trace.GotConn(ci)
|
||||
}
|
||||
|
||||
func traceWroteHeaders(trace *clientTrace) {
|
||||
if trace != nil && trace.WroteHeaders != nil {
|
||||
trace.WroteHeaders()
|
||||
}
|
||||
}
|
||||
|
||||
func traceGot100Continue(trace *clientTrace) {
|
||||
if trace != nil && trace.Got100Continue != nil {
|
||||
trace.Got100Continue()
|
||||
}
|
||||
}
|
||||
|
||||
func traceWait100Continue(trace *clientTrace) {
|
||||
if trace != nil && trace.Wait100Continue != nil {
|
||||
trace.Wait100Continue()
|
||||
}
|
||||
}
|
||||
|
||||
func traceWroteRequest(trace *clientTrace, err error) {
|
||||
if trace != nil && trace.WroteRequest != nil {
|
||||
trace.WroteRequest(httptrace.WroteRequestInfo{Err: err})
|
||||
}
|
||||
}
|
||||
|
||||
func traceFirstResponseByte(trace *clientTrace) {
|
||||
if trace != nil && trace.GotFirstResponseByte != nil {
|
||||
trace.GotFirstResponseByte()
|
||||
}
|
||||
}
|
||||
|
||||
func requestTrace(req *http.Request) *clientTrace {
|
||||
trace := httptrace.ContextClientTrace(req.Context())
|
||||
return (*clientTrace)(trace)
|
||||
}
|
||||
|
||||
// Ping sends a PING frame to the server and waits for the ack.
|
||||
func (cc *ClientConn) Ping(ctx context.Context) error {
|
||||
return cc.ping(ctx)
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.7,!go1.8
|
||||
|
||||
package http2
|
||||
|
||||
import "crypto/tls"
|
||||
|
||||
// temporary copy of Go 1.7's private tls.Config.clone:
|
||||
func cloneTLSConfig(c *tls.Config) *tls.Config {
|
||||
return &tls.Config{
|
||||
Rand: c.Rand,
|
||||
Time: c.Time,
|
||||
Certificates: c.Certificates,
|
||||
NameToCertificate: c.NameToCertificate,
|
||||
GetCertificate: c.GetCertificate,
|
||||
RootCAs: c.RootCAs,
|
||||
NextProtos: c.NextProtos,
|
||||
ServerName: c.ServerName,
|
||||
ClientAuth: c.ClientAuth,
|
||||
ClientCAs: c.ClientCAs,
|
||||
InsecureSkipVerify: c.InsecureSkipVerify,
|
||||
CipherSuites: c.CipherSuites,
|
||||
PreferServerCipherSuites: c.PreferServerCipherSuites,
|
||||
SessionTicketsDisabled: c.SessionTicketsDisabled,
|
||||
SessionTicketKey: c.SessionTicketKey,
|
||||
ClientSessionCache: c.ClientSessionCache,
|
||||
MinVersion: c.MinVersion,
|
||||
MaxVersion: c.MaxVersion,
|
||||
CurvePreferences: c.CurvePreferences,
|
||||
DynamicRecordSizingDisabled: c.DynamicRecordSizingDisabled,
|
||||
Renegotiation: c.Renegotiation,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.8
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"io"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func cloneTLSConfig(c *tls.Config) *tls.Config {
|
||||
c2 := c.Clone()
|
||||
c2.GetClientCertificate = c.GetClientCertificate // golang.org/issue/19264
|
||||
return c2
|
||||
}
|
||||
|
||||
var _ http.Pusher = (*responseWriter)(nil)
|
||||
|
||||
// Push implements http.Pusher.
|
||||
func (w *responseWriter) Push(target string, opts *http.PushOptions) error {
|
||||
internalOpts := pushOptions{}
|
||||
if opts != nil {
|
||||
internalOpts.Method = opts.Method
|
||||
internalOpts.Header = opts.Header
|
||||
}
|
||||
return w.push(target, internalOpts)
|
||||
}
|
||||
|
||||
func configureServer18(h1 *http.Server, h2 *Server) error {
|
||||
if h2.IdleTimeout == 0 {
|
||||
if h1.IdleTimeout != 0 {
|
||||
h2.IdleTimeout = h1.IdleTimeout
|
||||
} else {
|
||||
h2.IdleTimeout = h1.ReadTimeout
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func shouldLogPanic(panicValue interface{}) bool {
|
||||
return panicValue != nil && panicValue != http.ErrAbortHandler
|
||||
}
|
||||
|
||||
func reqGetBody(req *http.Request) func() (io.ReadCloser, error) {
|
||||
return req.GetBody
|
||||
}
|
||||
|
||||
func reqBodyIsNoBody(body io.ReadCloser) bool {
|
||||
return body == http.NoBody
|
||||
}
|
||||
|
||||
func go18httpNoBody() io.ReadCloser { return http.NoBody } // for tests only
|
|
@ -0,0 +1,16 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.9
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func configureServer19(s *http.Server, conf *Server) error {
|
||||
s.RegisterOnShutdown(conf.state.startGracefulShutdown)
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Defensive debug-only utility to track that functions run on the
|
||||
// goroutine that they're supposed to.
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var DebugGoroutines = os.Getenv("DEBUG_HTTP2_GOROUTINES") == "1"
|
||||
|
||||
type goroutineLock uint64
|
||||
|
||||
func newGoroutineLock() goroutineLock {
|
||||
if !DebugGoroutines {
|
||||
return 0
|
||||
}
|
||||
return goroutineLock(curGoroutineID())
|
||||
}
|
||||
|
||||
func (g goroutineLock) check() {
|
||||
if !DebugGoroutines {
|
||||
return
|
||||
}
|
||||
if curGoroutineID() != uint64(g) {
|
||||
panic("running on the wrong goroutine")
|
||||
}
|
||||
}
|
||||
|
||||
func (g goroutineLock) checkNotOn() {
|
||||
if !DebugGoroutines {
|
||||
return
|
||||
}
|
||||
if curGoroutineID() == uint64(g) {
|
||||
panic("running on the wrong goroutine")
|
||||
}
|
||||
}
|
||||
|
||||
var goroutineSpace = []byte("goroutine ")
|
||||
|
||||
func curGoroutineID() uint64 {
|
||||
bp := littleBuf.Get().(*[]byte)
|
||||
defer littleBuf.Put(bp)
|
||||
b := *bp
|
||||
b = b[:runtime.Stack(b, false)]
|
||||
// Parse the 4707 out of "goroutine 4707 ["
|
||||
b = bytes.TrimPrefix(b, goroutineSpace)
|
||||
i := bytes.IndexByte(b, ' ')
|
||||
if i < 0 {
|
||||
panic(fmt.Sprintf("No space found in %q", b))
|
||||
}
|
||||
b = b[:i]
|
||||
n, err := parseUintBytes(b, 10, 64)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to parse goroutine ID out of %q: %v", b, err))
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
var littleBuf = sync.Pool{
|
||||
New: func() interface{} {
|
||||
buf := make([]byte, 64)
|
||||
return &buf
|
||||
},
|
||||
}
|
||||
|
||||
// parseUintBytes is like strconv.ParseUint, but using a []byte.
|
||||
func parseUintBytes(s []byte, base int, bitSize int) (n uint64, err error) {
|
||||
var cutoff, maxVal uint64
|
||||
|
||||
if bitSize == 0 {
|
||||
bitSize = int(strconv.IntSize)
|
||||
}
|
||||
|
||||
s0 := s
|
||||
switch {
|
||||
case len(s) < 1:
|
||||
err = strconv.ErrSyntax
|
||||
goto Error
|
||||
|
||||
case 2 <= base && base <= 36:
|
||||
// valid base; nothing to do
|
||||
|
||||
case base == 0:
|
||||
// Look for octal, hex prefix.
|
||||
switch {
|
||||
case s[0] == '0' && len(s) > 1 && (s[1] == 'x' || s[1] == 'X'):
|
||||
base = 16
|
||||
s = s[2:]
|
||||
if len(s) < 1 {
|
||||
err = strconv.ErrSyntax
|
||||
goto Error
|
||||
}
|
||||
case s[0] == '0':
|
||||
base = 8
|
||||
default:
|
||||
base = 10
|
||||
}
|
||||
|
||||
default:
|
||||
err = errors.New("invalid base " + strconv.Itoa(base))
|
||||
goto Error
|
||||
}
|
||||
|
||||
n = 0
|
||||
cutoff = cutoff64(base)
|
||||
maxVal = 1<<uint(bitSize) - 1
|
||||
|
||||
for i := 0; i < len(s); i++ {
|
||||
var v byte
|
||||
d := s[i]
|
||||
switch {
|
||||
case '0' <= d && d <= '9':
|
||||
v = d - '0'
|
||||
case 'a' <= d && d <= 'z':
|
||||
v = d - 'a' + 10
|
||||
case 'A' <= d && d <= 'Z':
|
||||
v = d - 'A' + 10
|
||||
default:
|
||||
n = 0
|
||||
err = strconv.ErrSyntax
|
||||
goto Error
|
||||
}
|
||||
if int(v) >= base {
|
||||
n = 0
|
||||
err = strconv.ErrSyntax
|
||||
goto Error
|
||||
}
|
||||
|
||||
if n >= cutoff {
|
||||
// n*base overflows
|
||||
n = 1<<64 - 1
|
||||
err = strconv.ErrRange
|
||||
goto Error
|
||||
}
|
||||
n *= uint64(base)
|
||||
|
||||
n1 := n + uint64(v)
|
||||
if n1 < n || n1 > maxVal {
|
||||
// n+v overflows
|
||||
n = 1<<64 - 1
|
||||
err = strconv.ErrRange
|
||||
goto Error
|
||||
}
|
||||
n = n1
|
||||
}
|
||||
|
||||
return n, nil
|
||||
|
||||
Error:
|
||||
return n, &strconv.NumError{Func: "ParseUint", Num: string(s0), Err: err}
|
||||
}
|
||||
|
||||
// Return the first number n such that n*base >= 1<<64.
|
||||
func cutoff64(base int) uint64 {
|
||||
if base < 2 {
|
||||
return 0
|
||||
}
|
||||
return (1<<64-1)/uint64(base) + 1
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
commonLowerHeader = map[string]string{} // Go-Canonical-Case -> lower-case
|
||||
commonCanonHeader = map[string]string{} // lower-case -> Go-Canonical-Case
|
||||
)
|
||||
|
||||
func init() {
|
||||
for _, v := range []string{
|
||||
"accept",
|
||||
"accept-charset",
|
||||
"accept-encoding",
|
||||
"accept-language",
|
||||
"accept-ranges",
|
||||
"age",
|
||||
"access-control-allow-origin",
|
||||
"allow",
|
||||
"authorization",
|
||||
"cache-control",
|
||||
"content-disposition",
|
||||
"content-encoding",
|
||||
"content-language",
|
||||
"content-length",
|
||||
"content-location",
|
||||
"content-range",
|
||||
"content-type",
|
||||
"cookie",
|
||||
"date",
|
||||
"etag",
|
||||
"expect",
|
||||
"expires",
|
||||
"from",
|
||||
"host",
|
||||
"if-match",
|
||||
"if-modified-since",
|
||||
"if-none-match",
|
||||
"if-unmodified-since",
|
||||
"last-modified",
|
||||
"link",
|
||||
"location",
|
||||
"max-forwards",
|
||||
"proxy-authenticate",
|
||||
"proxy-authorization",
|
||||
"range",
|
||||
"referer",
|
||||
"refresh",
|
||||
"retry-after",
|
||||
"server",
|
||||
"set-cookie",
|
||||
"strict-transport-security",
|
||||
"trailer",
|
||||
"transfer-encoding",
|
||||
"user-agent",
|
||||
"vary",
|
||||
"via",
|
||||
"www-authenticate",
|
||||
} {
|
||||
chk := http.CanonicalHeaderKey(v)
|
||||
commonLowerHeader[chk] = v
|
||||
commonCanonHeader[v] = chk
|
||||
}
|
||||
}
|
||||
|
||||
func lowerHeader(v string) string {
|
||||
if s, ok := commonLowerHeader[v]; ok {
|
||||
return s
|
||||
}
|
||||
return strings.ToLower(v)
|
||||
}
|
|
@ -0,0 +1,240 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package hpack
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
const (
|
||||
uint32Max = ^uint32(0)
|
||||
initialHeaderTableSize = 4096
|
||||
)
|
||||
|
||||
type Encoder struct {
|
||||
dynTab dynamicTable
|
||||
// minSize is the minimum table size set by
|
||||
// SetMaxDynamicTableSize after the previous Header Table Size
|
||||
// Update.
|
||||
minSize uint32
|
||||
// maxSizeLimit is the maximum table size this encoder
|
||||
// supports. This will protect the encoder from too large
|
||||
// size.
|
||||
maxSizeLimit uint32
|
||||
// tableSizeUpdate indicates whether "Header Table Size
|
||||
// Update" is required.
|
||||
tableSizeUpdate bool
|
||||
w io.Writer
|
||||
buf []byte
|
||||
}
|
||||
|
||||
// NewEncoder returns a new Encoder which performs HPACK encoding. An
|
||||
// encoded data is written to w.
|
||||
func NewEncoder(w io.Writer) *Encoder {
|
||||
e := &Encoder{
|
||||
minSize: uint32Max,
|
||||
maxSizeLimit: initialHeaderTableSize,
|
||||
tableSizeUpdate: false,
|
||||
w: w,
|
||||
}
|
||||
e.dynTab.table.init()
|
||||
e.dynTab.setMaxSize(initialHeaderTableSize)
|
||||
return e
|
||||
}
|
||||
|
||||
// WriteField encodes f into a single Write to e's underlying Writer.
|
||||
// This function may also produce bytes for "Header Table Size Update"
|
||||
// if necessary. If produced, it is done before encoding f.
|
||||
func (e *Encoder) WriteField(f HeaderField) error {
|
||||
e.buf = e.buf[:0]
|
||||
|
||||
if e.tableSizeUpdate {
|
||||
e.tableSizeUpdate = false
|
||||
if e.minSize < e.dynTab.maxSize {
|
||||
e.buf = appendTableSize(e.buf, e.minSize)
|
||||
}
|
||||
e.minSize = uint32Max
|
||||
e.buf = appendTableSize(e.buf, e.dynTab.maxSize)
|
||||
}
|
||||
|
||||
idx, nameValueMatch := e.searchTable(f)
|
||||
if nameValueMatch {
|
||||
e.buf = appendIndexed(e.buf, idx)
|
||||
} else {
|
||||
indexing := e.shouldIndex(f)
|
||||
if indexing {
|
||||
e.dynTab.add(f)
|
||||
}
|
||||
|
||||
if idx == 0 {
|
||||
e.buf = appendNewName(e.buf, f, indexing)
|
||||
} else {
|
||||
e.buf = appendIndexedName(e.buf, f, idx, indexing)
|
||||
}
|
||||
}
|
||||
n, err := e.w.Write(e.buf)
|
||||
if err == nil && n != len(e.buf) {
|
||||
err = io.ErrShortWrite
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// searchTable searches f in both stable and dynamic header tables.
|
||||
// The static header table is searched first. Only when there is no
|
||||
// exact match for both name and value, the dynamic header table is
|
||||
// then searched. If there is no match, i is 0. If both name and value
|
||||
// match, i is the matched index and nameValueMatch becomes true. If
|
||||
// only name matches, i points to that index and nameValueMatch
|
||||
// becomes false.
|
||||
func (e *Encoder) searchTable(f HeaderField) (i uint64, nameValueMatch bool) {
|
||||
i, nameValueMatch = staticTable.search(f)
|
||||
if nameValueMatch {
|
||||
return i, true
|
||||
}
|
||||
|
||||
j, nameValueMatch := e.dynTab.table.search(f)
|
||||
if nameValueMatch || (i == 0 && j != 0) {
|
||||
return j + uint64(staticTable.len()), nameValueMatch
|
||||
}
|
||||
|
||||
return i, false
|
||||
}
|
||||
|
||||
// SetMaxDynamicTableSize changes the dynamic header table size to v.
|
||||
// The actual size is bounded by the value passed to
|
||||
// SetMaxDynamicTableSizeLimit.
|
||||
func (e *Encoder) SetMaxDynamicTableSize(v uint32) {
|
||||
if v > e.maxSizeLimit {
|
||||
v = e.maxSizeLimit
|
||||
}
|
||||
if v < e.minSize {
|
||||
e.minSize = v
|
||||
}
|
||||
e.tableSizeUpdate = true
|
||||
e.dynTab.setMaxSize(v)
|
||||
}
|
||||
|
||||
// SetMaxDynamicTableSizeLimit changes the maximum value that can be
|
||||
// specified in SetMaxDynamicTableSize to v. By default, it is set to
|
||||
// 4096, which is the same size of the default dynamic header table
|
||||
// size described in HPACK specification. If the current maximum
|
||||
// dynamic header table size is strictly greater than v, "Header Table
|
||||
// Size Update" will be done in the next WriteField call and the
|
||||
// maximum dynamic header table size is truncated to v.
|
||||
func (e *Encoder) SetMaxDynamicTableSizeLimit(v uint32) {
|
||||
e.maxSizeLimit = v
|
||||
if e.dynTab.maxSize > v {
|
||||
e.tableSizeUpdate = true
|
||||
e.dynTab.setMaxSize(v)
|
||||
}
|
||||
}
|
||||
|
||||
// shouldIndex reports whether f should be indexed.
|
||||
func (e *Encoder) shouldIndex(f HeaderField) bool {
|
||||
return !f.Sensitive && f.Size() <= e.dynTab.maxSize
|
||||
}
|
||||
|
||||
// appendIndexed appends index i, as encoded in "Indexed Header Field"
|
||||
// representation, to dst and returns the extended buffer.
|
||||
func appendIndexed(dst []byte, i uint64) []byte {
|
||||
first := len(dst)
|
||||
dst = appendVarInt(dst, 7, i)
|
||||
dst[first] |= 0x80
|
||||
return dst
|
||||
}
|
||||
|
||||
// appendNewName appends f, as encoded in one of "Literal Header field
|
||||
// - New Name" representation variants, to dst and returns the
|
||||
// extended buffer.
|
||||
//
|
||||
// If f.Sensitive is true, "Never Indexed" representation is used. If
|
||||
// f.Sensitive is false and indexing is true, "Inremental Indexing"
|
||||
// representation is used.
|
||||
func appendNewName(dst []byte, f HeaderField, indexing bool) []byte {
|
||||
dst = append(dst, encodeTypeByte(indexing, f.Sensitive))
|
||||
dst = appendHpackString(dst, f.Name)
|
||||
return appendHpackString(dst, f.Value)
|
||||
}
|
||||
|
||||
// appendIndexedName appends f and index i referring indexed name
|
||||
// entry, as encoded in one of "Literal Header field - Indexed Name"
|
||||
// representation variants, to dst and returns the extended buffer.
|
||||
//
|
||||
// If f.Sensitive is true, "Never Indexed" representation is used. If
|
||||
// f.Sensitive is false and indexing is true, "Incremental Indexing"
|
||||
// representation is used.
|
||||
func appendIndexedName(dst []byte, f HeaderField, i uint64, indexing bool) []byte {
|
||||
first := len(dst)
|
||||
var n byte
|
||||
if indexing {
|
||||
n = 6
|
||||
} else {
|
||||
n = 4
|
||||
}
|
||||
dst = appendVarInt(dst, n, i)
|
||||
dst[first] |= encodeTypeByte(indexing, f.Sensitive)
|
||||
return appendHpackString(dst, f.Value)
|
||||
}
|
||||
|
||||
// appendTableSize appends v, as encoded in "Header Table Size Update"
|
||||
// representation, to dst and returns the extended buffer.
|
||||
func appendTableSize(dst []byte, v uint32) []byte {
|
||||
first := len(dst)
|
||||
dst = appendVarInt(dst, 5, uint64(v))
|
||||
dst[first] |= 0x20
|
||||
return dst
|
||||
}
|
||||
|
||||
// appendVarInt appends i, as encoded in variable integer form using n
|
||||
// bit prefix, to dst and returns the extended buffer.
|
||||
//
|
||||
// See
|
||||
// http://http2.github.io/http2-spec/compression.html#integer.representation
|
||||
func appendVarInt(dst []byte, n byte, i uint64) []byte {
|
||||
k := uint64((1 << n) - 1)
|
||||
if i < k {
|
||||
return append(dst, byte(i))
|
||||
}
|
||||
dst = append(dst, byte(k))
|
||||
i -= k
|
||||
for ; i >= 128; i >>= 7 {
|
||||
dst = append(dst, byte(0x80|(i&0x7f)))
|
||||
}
|
||||
return append(dst, byte(i))
|
||||
}
|
||||
|
||||
// appendHpackString appends s, as encoded in "String Literal"
|
||||
// representation, to dst and returns the the extended buffer.
|
||||
//
|
||||
// s will be encoded in Huffman codes only when it produces strictly
|
||||
// shorter byte string.
|
||||
func appendHpackString(dst []byte, s string) []byte {
|
||||
huffmanLength := HuffmanEncodeLength(s)
|
||||
if huffmanLength < uint64(len(s)) {
|
||||
first := len(dst)
|
||||
dst = appendVarInt(dst, 7, huffmanLength)
|
||||
dst = AppendHuffmanString(dst, s)
|
||||
dst[first] |= 0x80
|
||||
} else {
|
||||
dst = appendVarInt(dst, 7, uint64(len(s)))
|
||||
dst = append(dst, s...)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// encodeTypeByte returns type byte. If sensitive is true, type byte
|
||||
// for "Never Indexed" representation is returned. If sensitive is
|
||||
// false and indexing is true, type byte for "Incremental Indexing"
|
||||
// representation is returned. Otherwise, type byte for "Without
|
||||
// Indexing" is returned.
|
||||
func encodeTypeByte(indexing, sensitive bool) byte {
|
||||
if sensitive {
|
||||
return 0x10
|
||||
}
|
||||
if indexing {
|
||||
return 0x40
|
||||
}
|
||||
return 0
|
||||
}
|
|
@ -0,0 +1,490 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package hpack implements HPACK, a compression format for
|
||||
// efficiently representing HTTP header fields in the context of HTTP/2.
|
||||
//
|
||||
// See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09
|
||||
package hpack
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// A DecodingError is something the spec defines as a decoding error.
|
||||
type DecodingError struct {
|
||||
Err error
|
||||
}
|
||||
|
||||
func (de DecodingError) Error() string {
|
||||
return fmt.Sprintf("decoding error: %v", de.Err)
|
||||
}
|
||||
|
||||
// An InvalidIndexError is returned when an encoder references a table
|
||||
// entry before the static table or after the end of the dynamic table.
|
||||
type InvalidIndexError int
|
||||
|
||||
func (e InvalidIndexError) Error() string {
|
||||
return fmt.Sprintf("invalid indexed representation index %d", int(e))
|
||||
}
|
||||
|
||||
// A HeaderField is a name-value pair. Both the name and value are
|
||||
// treated as opaque sequences of octets.
|
||||
type HeaderField struct {
|
||||
Name, Value string
|
||||
|
||||
// Sensitive means that this header field should never be
|
||||
// indexed.
|
||||
Sensitive bool
|
||||
}
|
||||
|
||||
// IsPseudo reports whether the header field is an http2 pseudo header.
|
||||
// That is, it reports whether it starts with a colon.
|
||||
// It is not otherwise guaranteed to be a valid pseudo header field,
|
||||
// though.
|
||||
func (hf HeaderField) IsPseudo() bool {
|
||||
return len(hf.Name) != 0 && hf.Name[0] == ':'
|
||||
}
|
||||
|
||||
func (hf HeaderField) String() string {
|
||||
var suffix string
|
||||
if hf.Sensitive {
|
||||
suffix = " (sensitive)"
|
||||
}
|
||||
return fmt.Sprintf("header field %q = %q%s", hf.Name, hf.Value, suffix)
|
||||
}
|
||||
|
||||
// Size returns the size of an entry per RFC 7541 section 4.1.
|
||||
func (hf HeaderField) Size() uint32 {
|
||||
// http://http2.github.io/http2-spec/compression.html#rfc.section.4.1
|
||||
// "The size of the dynamic table is the sum of the size of
|
||||
// its entries. The size of an entry is the sum of its name's
|
||||
// length in octets (as defined in Section 5.2), its value's
|
||||
// length in octets (see Section 5.2), plus 32. The size of
|
||||
// an entry is calculated using the length of the name and
|
||||
// value without any Huffman encoding applied."
|
||||
|
||||
// This can overflow if somebody makes a large HeaderField
|
||||
// Name and/or Value by hand, but we don't care, because that
|
||||
// won't happen on the wire because the encoding doesn't allow
|
||||
// it.
|
||||
return uint32(len(hf.Name) + len(hf.Value) + 32)
|
||||
}
|
||||
|
||||
// A Decoder is the decoding context for incremental processing of
|
||||
// header blocks.
|
||||
type Decoder struct {
|
||||
dynTab dynamicTable
|
||||
emit func(f HeaderField)
|
||||
|
||||
emitEnabled bool // whether calls to emit are enabled
|
||||
maxStrLen int // 0 means unlimited
|
||||
|
||||
// buf is the unparsed buffer. It's only written to
|
||||
// saveBuf if it was truncated in the middle of a header
|
||||
// block. Because it's usually not owned, we can only
|
||||
// process it under Write.
|
||||
buf []byte // not owned; only valid during Write
|
||||
|
||||
// saveBuf is previous data passed to Write which we weren't able
|
||||
// to fully parse before. Unlike buf, we own this data.
|
||||
saveBuf bytes.Buffer
|
||||
}
|
||||
|
||||
// NewDecoder returns a new decoder with the provided maximum dynamic
|
||||
// table size. The emitFunc will be called for each valid field
|
||||
// parsed, in the same goroutine as calls to Write, before Write returns.
|
||||
func NewDecoder(maxDynamicTableSize uint32, emitFunc func(f HeaderField)) *Decoder {
|
||||
d := &Decoder{
|
||||
emit: emitFunc,
|
||||
emitEnabled: true,
|
||||
}
|
||||
d.dynTab.table.init()
|
||||
d.dynTab.allowedMaxSize = maxDynamicTableSize
|
||||
d.dynTab.setMaxSize(maxDynamicTableSize)
|
||||
return d
|
||||
}
|
||||
|
||||
// ErrStringLength is returned by Decoder.Write when the max string length
|
||||
// (as configured by Decoder.SetMaxStringLength) would be violated.
|
||||
var ErrStringLength = errors.New("hpack: string too long")
|
||||
|
||||
// SetMaxStringLength sets the maximum size of a HeaderField name or
|
||||
// value string. If a string exceeds this length (even after any
|
||||
// decompression), Write will return ErrStringLength.
|
||||
// A value of 0 means unlimited and is the default from NewDecoder.
|
||||
func (d *Decoder) SetMaxStringLength(n int) {
|
||||
d.maxStrLen = n
|
||||
}
|
||||
|
||||
// SetEmitFunc changes the callback used when new header fields
|
||||
// are decoded.
|
||||
// It must be non-nil. It does not affect EmitEnabled.
|
||||
func (d *Decoder) SetEmitFunc(emitFunc func(f HeaderField)) {
|
||||
d.emit = emitFunc
|
||||
}
|
||||
|
||||
// SetEmitEnabled controls whether the emitFunc provided to NewDecoder
|
||||
// should be called. The default is true.
|
||||
//
|
||||
// This facility exists to let servers enforce MAX_HEADER_LIST_SIZE
|
||||
// while still decoding and keeping in-sync with decoder state, but
|
||||
// without doing unnecessary decompression or generating unnecessary
|
||||
// garbage for header fields past the limit.
|
||||
func (d *Decoder) SetEmitEnabled(v bool) { d.emitEnabled = v }
|
||||
|
||||
// EmitEnabled reports whether calls to the emitFunc provided to NewDecoder
|
||||
// are currently enabled. The default is true.
|
||||
func (d *Decoder) EmitEnabled() bool { return d.emitEnabled }
|
||||
|
||||
// TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their
|
||||
// underlying buffers for garbage reasons.
|
||||
|
||||
func (d *Decoder) SetMaxDynamicTableSize(v uint32) {
|
||||
d.dynTab.setMaxSize(v)
|
||||
}
|
||||
|
||||
// SetAllowedMaxDynamicTableSize sets the upper bound that the encoded
|
||||
// stream (via dynamic table size updates) may set the maximum size
|
||||
// to.
|
||||
func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) {
|
||||
d.dynTab.allowedMaxSize = v
|
||||
}
|
||||
|
||||
type dynamicTable struct {
|
||||
// http://http2.github.io/http2-spec/compression.html#rfc.section.2.3.2
|
||||
table headerFieldTable
|
||||
size uint32 // in bytes
|
||||
maxSize uint32 // current maxSize
|
||||
allowedMaxSize uint32 // maxSize may go up to this, inclusive
|
||||
}
|
||||
|
||||
func (dt *dynamicTable) setMaxSize(v uint32) {
|
||||
dt.maxSize = v
|
||||
dt.evict()
|
||||
}
|
||||
|
||||
func (dt *dynamicTable) add(f HeaderField) {
|
||||
dt.table.addEntry(f)
|
||||
dt.size += f.Size()
|
||||
dt.evict()
|
||||
}
|
||||
|
||||
// If we're too big, evict old stuff.
|
||||
func (dt *dynamicTable) evict() {
|
||||
var n int
|
||||
for dt.size > dt.maxSize && n < dt.table.len() {
|
||||
dt.size -= dt.table.ents[n].Size()
|
||||
n++
|
||||
}
|
||||
dt.table.evictOldest(n)
|
||||
}
|
||||
|
||||
func (d *Decoder) maxTableIndex() int {
|
||||
// This should never overflow. RFC 7540 Section 6.5.2 limits the size of
|
||||
// the dynamic table to 2^32 bytes, where each entry will occupy more than
|
||||
// one byte. Further, the staticTable has a fixed, small length.
|
||||
return d.dynTab.table.len() + staticTable.len()
|
||||
}
|
||||
|
||||
func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) {
|
||||
// See Section 2.3.3.
|
||||
if i == 0 {
|
||||
return
|
||||
}
|
||||
if i <= uint64(staticTable.len()) {
|
||||
return staticTable.ents[i-1], true
|
||||
}
|
||||
if i > uint64(d.maxTableIndex()) {
|
||||
return
|
||||
}
|
||||
// In the dynamic table, newer entries have lower indices.
|
||||
// However, dt.ents[0] is the oldest entry. Hence, dt.ents is
|
||||
// the reversed dynamic table.
|
||||
dt := d.dynTab.table
|
||||
return dt.ents[dt.len()-(int(i)-staticTable.len())], true
|
||||
}
|
||||
|
||||
// Decode decodes an entire block.
|
||||
//
|
||||
// TODO: remove this method and make it incremental later? This is
|
||||
// easier for debugging now.
|
||||
func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) {
|
||||
var hf []HeaderField
|
||||
saveFunc := d.emit
|
||||
defer func() { d.emit = saveFunc }()
|
||||
d.emit = func(f HeaderField) { hf = append(hf, f) }
|
||||
if _, err := d.Write(p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := d.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return hf, nil
|
||||
}
|
||||
|
||||
func (d *Decoder) Close() error {
|
||||
if d.saveBuf.Len() > 0 {
|
||||
d.saveBuf.Reset()
|
||||
return DecodingError{errors.New("truncated headers")}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Decoder) Write(p []byte) (n int, err error) {
|
||||
if len(p) == 0 {
|
||||
// Prevent state machine CPU attacks (making us redo
|
||||
// work up to the point of finding out we don't have
|
||||
// enough data)
|
||||
return
|
||||
}
|
||||
// Only copy the data if we have to. Optimistically assume
|
||||
// that p will contain a complete header block.
|
||||
if d.saveBuf.Len() == 0 {
|
||||
d.buf = p
|
||||
} else {
|
||||
d.saveBuf.Write(p)
|
||||
d.buf = d.saveBuf.Bytes()
|
||||
d.saveBuf.Reset()
|
||||
}
|
||||
|
||||
for len(d.buf) > 0 {
|
||||
err = d.parseHeaderFieldRepr()
|
||||
if err == errNeedMore {
|
||||
// Extra paranoia, making sure saveBuf won't
|
||||
// get too large. All the varint and string
|
||||
// reading code earlier should already catch
|
||||
// overlong things and return ErrStringLength,
|
||||
// but keep this as a last resort.
|
||||
const varIntOverhead = 8 // conservative
|
||||
if d.maxStrLen != 0 && int64(len(d.buf)) > 2*(int64(d.maxStrLen)+varIntOverhead) {
|
||||
return 0, ErrStringLength
|
||||
}
|
||||
d.saveBuf.Write(d.buf)
|
||||
return len(p), nil
|
||||
}
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
return len(p), err
|
||||
}
|
||||
|
||||
// errNeedMore is an internal sentinel error value that means the
|
||||
// buffer is truncated and we need to read more data before we can
|
||||
// continue parsing.
|
||||
var errNeedMore = errors.New("need more data")
|
||||
|
||||
type indexType int
|
||||
|
||||
const (
|
||||
indexedTrue indexType = iota
|
||||
indexedFalse
|
||||
indexedNever
|
||||
)
|
||||
|
||||
func (v indexType) indexed() bool { return v == indexedTrue }
|
||||
func (v indexType) sensitive() bool { return v == indexedNever }
|
||||
|
||||
// returns errNeedMore if there isn't enough data available.
|
||||
// any other error is fatal.
|
||||
// consumes d.buf iff it returns nil.
|
||||
// precondition: must be called with len(d.buf) > 0
|
||||
func (d *Decoder) parseHeaderFieldRepr() error {
|
||||
b := d.buf[0]
|
||||
switch {
|
||||
case b&128 != 0:
|
||||
// Indexed representation.
|
||||
// High bit set?
|
||||
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.1
|
||||
return d.parseFieldIndexed()
|
||||
case b&192 == 64:
|
||||
// 6.2.1 Literal Header Field with Incremental Indexing
|
||||
// 0b10xxxxxx: top two bits are 10
|
||||
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.1
|
||||
return d.parseFieldLiteral(6, indexedTrue)
|
||||
case b&240 == 0:
|
||||
// 6.2.2 Literal Header Field without Indexing
|
||||
// 0b0000xxxx: top four bits are 0000
|
||||
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.2
|
||||
return d.parseFieldLiteral(4, indexedFalse)
|
||||
case b&240 == 16:
|
||||
// 6.2.3 Literal Header Field never Indexed
|
||||
// 0b0001xxxx: top four bits are 0001
|
||||
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.3
|
||||
return d.parseFieldLiteral(4, indexedNever)
|
||||
case b&224 == 32:
|
||||
// 6.3 Dynamic Table Size Update
|
||||
// Top three bits are '001'.
|
||||
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.3
|
||||
return d.parseDynamicTableSizeUpdate()
|
||||
}
|
||||
|
||||
return DecodingError{errors.New("invalid encoding")}
|
||||
}
|
||||
|
||||
// (same invariants and behavior as parseHeaderFieldRepr)
|
||||
func (d *Decoder) parseFieldIndexed() error {
|
||||
buf := d.buf
|
||||
idx, buf, err := readVarInt(7, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hf, ok := d.at(idx)
|
||||
if !ok {
|
||||
return DecodingError{InvalidIndexError(idx)}
|
||||
}
|
||||
d.buf = buf
|
||||
return d.callEmit(HeaderField{Name: hf.Name, Value: hf.Value})
|
||||
}
|
||||
|
||||
// (same invariants and behavior as parseHeaderFieldRepr)
|
||||
func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error {
|
||||
buf := d.buf
|
||||
nameIdx, buf, err := readVarInt(n, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var hf HeaderField
|
||||
wantStr := d.emitEnabled || it.indexed()
|
||||
if nameIdx > 0 {
|
||||
ihf, ok := d.at(nameIdx)
|
||||
if !ok {
|
||||
return DecodingError{InvalidIndexError(nameIdx)}
|
||||
}
|
||||
hf.Name = ihf.Name
|
||||
} else {
|
||||
hf.Name, buf, err = d.readString(buf, wantStr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
hf.Value, buf, err = d.readString(buf, wantStr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.buf = buf
|
||||
if it.indexed() {
|
||||
d.dynTab.add(hf)
|
||||
}
|
||||
hf.Sensitive = it.sensitive()
|
||||
return d.callEmit(hf)
|
||||
}
|
||||
|
||||
func (d *Decoder) callEmit(hf HeaderField) error {
|
||||
if d.maxStrLen != 0 {
|
||||
if len(hf.Name) > d.maxStrLen || len(hf.Value) > d.maxStrLen {
|
||||
return ErrStringLength
|
||||
}
|
||||
}
|
||||
if d.emitEnabled {
|
||||
d.emit(hf)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// (same invariants and behavior as parseHeaderFieldRepr)
|
||||
func (d *Decoder) parseDynamicTableSizeUpdate() error {
|
||||
buf := d.buf
|
||||
size, buf, err := readVarInt(5, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if size > uint64(d.dynTab.allowedMaxSize) {
|
||||
return DecodingError{errors.New("dynamic table size update too large")}
|
||||
}
|
||||
d.dynTab.setMaxSize(uint32(size))
|
||||
d.buf = buf
|
||||
return nil
|
||||
}
|
||||
|
||||
var errVarintOverflow = DecodingError{errors.New("varint integer overflow")}
|
||||
|
||||
// readVarInt reads an unsigned variable length integer off the
|
||||
// beginning of p. n is the parameter as described in
|
||||
// http://http2.github.io/http2-spec/compression.html#rfc.section.5.1.
|
||||
//
|
||||
// n must always be between 1 and 8.
|
||||
//
|
||||
// The returned remain buffer is either a smaller suffix of p, or err != nil.
|
||||
// The error is errNeedMore if p doesn't contain a complete integer.
|
||||
func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) {
|
||||
if n < 1 || n > 8 {
|
||||
panic("bad n")
|
||||
}
|
||||
if len(p) == 0 {
|
||||
return 0, p, errNeedMore
|
||||
}
|
||||
i = uint64(p[0])
|
||||
if n < 8 {
|
||||
i &= (1 << uint64(n)) - 1
|
||||
}
|
||||
if i < (1<<uint64(n))-1 {
|
||||
return i, p[1:], nil
|
||||
}
|
||||
|
||||
origP := p
|
||||
p = p[1:]
|
||||
var m uint64
|
||||
for len(p) > 0 {
|
||||
b := p[0]
|
||||
p = p[1:]
|
||||
i += uint64(b&127) << m
|
||||
if b&128 == 0 {
|
||||
return i, p, nil
|
||||
}
|
||||
m += 7
|
||||
if m >= 63 { // TODO: proper overflow check. making this up.
|
||||
return 0, origP, errVarintOverflow
|
||||
}
|
||||
}
|
||||
return 0, origP, errNeedMore
|
||||
}
|
||||
|
||||
// readString decodes an hpack string from p.
|
||||
//
|
||||
// wantStr is whether s will be used. If false, decompression and
|
||||
// []byte->string garbage are skipped if s will be ignored
|
||||
// anyway. This does mean that huffman decoding errors for non-indexed
|
||||
// strings past the MAX_HEADER_LIST_SIZE are ignored, but the server
|
||||
// is returning an error anyway, and because they're not indexed, the error
|
||||
// won't affect the decoding state.
|
||||
func (d *Decoder) readString(p []byte, wantStr bool) (s string, remain []byte, err error) {
|
||||
if len(p) == 0 {
|
||||
return "", p, errNeedMore
|
||||
}
|
||||
isHuff := p[0]&128 != 0
|
||||
strLen, p, err := readVarInt(7, p)
|
||||
if err != nil {
|
||||
return "", p, err
|
||||
}
|
||||
if d.maxStrLen != 0 && strLen > uint64(d.maxStrLen) {
|
||||
return "", nil, ErrStringLength
|
||||
}
|
||||
if uint64(len(p)) < strLen {
|
||||
return "", p, errNeedMore
|
||||
}
|
||||
if !isHuff {
|
||||
if wantStr {
|
||||
s = string(p[:strLen])
|
||||
}
|
||||
return s, p[strLen:], nil
|
||||
}
|
||||
|
||||
if wantStr {
|
||||
buf := bufPool.Get().(*bytes.Buffer)
|
||||
buf.Reset() // don't trust others
|
||||
defer bufPool.Put(buf)
|
||||
if err := huffmanDecode(buf, d.maxStrLen, p[:strLen]); err != nil {
|
||||
buf.Reset()
|
||||
return "", nil, err
|
||||
}
|
||||
s = buf.String()
|
||||
buf.Reset() // be nice to GC
|
||||
}
|
||||
return s, p[strLen:], nil
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package hpack
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var bufPool = sync.Pool{
|
||||
New: func() interface{} { return new(bytes.Buffer) },
|
||||
}
|
||||
|
||||
// HuffmanDecode decodes the string in v and writes the expanded
|
||||
// result to w, returning the number of bytes written to w and the
|
||||
// Write call's return value. At most one Write call is made.
|
||||
func HuffmanDecode(w io.Writer, v []byte) (int, error) {
|
||||
buf := bufPool.Get().(*bytes.Buffer)
|
||||
buf.Reset()
|
||||
defer bufPool.Put(buf)
|
||||
if err := huffmanDecode(buf, 0, v); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return w.Write(buf.Bytes())
|
||||
}
|
||||
|
||||
// HuffmanDecodeToString decodes the string in v.
|
||||
func HuffmanDecodeToString(v []byte) (string, error) {
|
||||
buf := bufPool.Get().(*bytes.Buffer)
|
||||
buf.Reset()
|
||||
defer bufPool.Put(buf)
|
||||
if err := huffmanDecode(buf, 0, v); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
// ErrInvalidHuffman is returned for errors found decoding
|
||||
// Huffman-encoded strings.
|
||||
var ErrInvalidHuffman = errors.New("hpack: invalid Huffman-encoded data")
|
||||
|
||||
// huffmanDecode decodes v to buf.
|
||||
// If maxLen is greater than 0, attempts to write more to buf than
|
||||
// maxLen bytes will return ErrStringLength.
|
||||
func huffmanDecode(buf *bytes.Buffer, maxLen int, v []byte) error {
|
||||
n := rootHuffmanNode
|
||||
// cur is the bit buffer that has not been fed into n.
|
||||
// cbits is the number of low order bits in cur that are valid.
|
||||
// sbits is the number of bits of the symbol prefix being decoded.
|
||||
cur, cbits, sbits := uint(0), uint8(0), uint8(0)
|
||||
for _, b := range v {
|
||||
cur = cur<<8 | uint(b)
|
||||
cbits += 8
|
||||
sbits += 8
|
||||
for cbits >= 8 {
|
||||
idx := byte(cur >> (cbits - 8))
|
||||
n = n.children[idx]
|
||||
if n == nil {
|
||||
return ErrInvalidHuffman
|
||||
}
|
||||
if n.children == nil {
|
||||
if maxLen != 0 && buf.Len() == maxLen {
|
||||
return ErrStringLength
|
||||
}
|
||||
buf.WriteByte(n.sym)
|
||||
cbits -= n.codeLen
|
||||
n = rootHuffmanNode
|
||||
sbits = cbits
|
||||
} else {
|
||||
cbits -= 8
|
||||
}
|
||||
}
|
||||
}
|
||||
for cbits > 0 {
|
||||
n = n.children[byte(cur<<(8-cbits))]
|
||||
if n == nil {
|
||||
return ErrInvalidHuffman
|
||||
}
|
||||
if n.children != nil || n.codeLen > cbits {
|
||||
break
|
||||
}
|
||||
if maxLen != 0 && buf.Len() == maxLen {
|
||||
return ErrStringLength
|
||||
}
|
||||
buf.WriteByte(n.sym)
|
||||
cbits -= n.codeLen
|
||||
n = rootHuffmanNode
|
||||
sbits = cbits
|
||||
}
|
||||
if sbits > 7 {
|
||||
// Either there was an incomplete symbol, or overlong padding.
|
||||
// Both are decoding errors per RFC 7541 section 5.2.
|
||||
return ErrInvalidHuffman
|
||||
}
|
||||
if mask := uint(1<<cbits - 1); cur&mask != mask {
|
||||
// Trailing bits must be a prefix of EOS per RFC 7541 section 5.2.
|
||||
return ErrInvalidHuffman
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type node struct {
|
||||
// children is non-nil for internal nodes
|
||||
children []*node
|
||||
|
||||
// The following are only valid if children is nil:
|
||||
codeLen uint8 // number of bits that led to the output of sym
|
||||
sym byte // output symbol
|
||||
}
|
||||
|
||||
func newInternalNode() *node {
|
||||
return &node{children: make([]*node, 256)}
|
||||
}
|
||||
|
||||
var rootHuffmanNode = newInternalNode()
|
||||
|
||||
func init() {
|
||||
if len(huffmanCodes) != 256 {
|
||||
panic("unexpected size")
|
||||
}
|
||||
for i, code := range huffmanCodes {
|
||||
addDecoderNode(byte(i), code, huffmanCodeLen[i])
|
||||
}
|
||||
}
|
||||
|
||||
func addDecoderNode(sym byte, code uint32, codeLen uint8) {
|
||||
cur := rootHuffmanNode
|
||||
for codeLen > 8 {
|
||||
codeLen -= 8
|
||||
i := uint8(code >> codeLen)
|
||||
if cur.children[i] == nil {
|
||||
cur.children[i] = newInternalNode()
|
||||
}
|
||||
cur = cur.children[i]
|
||||
}
|
||||
shift := 8 - codeLen
|
||||
start, end := int(uint8(code<<shift)), int(1<<shift)
|
||||
for i := start; i < start+end; i++ {
|
||||
cur.children[i] = &node{sym: sym, codeLen: codeLen}
|
||||
}
|
||||
}
|
||||
|
||||
// AppendHuffmanString appends s, as encoded in Huffman codes, to dst
|
||||
// and returns the extended buffer.
|
||||
func AppendHuffmanString(dst []byte, s string) []byte {
|
||||
rembits := uint8(8)
|
||||
|
||||
for i := 0; i < len(s); i++ {
|
||||
if rembits == 8 {
|
||||
dst = append(dst, 0)
|
||||
}
|
||||
dst, rembits = appendByteToHuffmanCode(dst, rembits, s[i])
|
||||
}
|
||||
|
||||
if rembits < 8 {
|
||||
// special EOS symbol
|
||||
code := uint32(0x3fffffff)
|
||||
nbits := uint8(30)
|
||||
|
||||
t := uint8(code >> (nbits - rembits))
|
||||
dst[len(dst)-1] |= t
|
||||
}
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
// HuffmanEncodeLength returns the number of bytes required to encode
|
||||
// s in Huffman codes. The result is round up to byte boundary.
|
||||
func HuffmanEncodeLength(s string) uint64 {
|
||||
n := uint64(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
n += uint64(huffmanCodeLen[s[i]])
|
||||
}
|
||||
return (n + 7) / 8
|
||||
}
|
||||
|
||||
// appendByteToHuffmanCode appends Huffman code for c to dst and
|
||||
// returns the extended buffer and the remaining bits in the last
|
||||
// element. The appending is not byte aligned and the remaining bits
|
||||
// in the last element of dst is given in rembits.
|
||||
func appendByteToHuffmanCode(dst []byte, rembits uint8, c byte) ([]byte, uint8) {
|
||||
code := huffmanCodes[c]
|
||||
nbits := huffmanCodeLen[c]
|
||||
|
||||
for {
|
||||
if rembits > nbits {
|
||||
t := uint8(code << (rembits - nbits))
|
||||
dst[len(dst)-1] |= t
|
||||
rembits -= nbits
|
||||
break
|
||||
}
|
||||
|
||||
t := uint8(code >> (nbits - rembits))
|
||||
dst[len(dst)-1] |= t
|
||||
|
||||
nbits -= rembits
|
||||
rembits = 8
|
||||
|
||||
if nbits == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
dst = append(dst, 0)
|
||||
}
|
||||
|
||||
return dst, rembits
|
||||
}
|
|
@ -0,0 +1,479 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package hpack
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// headerFieldTable implements a list of HeaderFields.
|
||||
// This is used to implement the static and dynamic tables.
|
||||
type headerFieldTable struct {
|
||||
// For static tables, entries are never evicted.
|
||||
//
|
||||
// For dynamic tables, entries are evicted from ents[0] and added to the end.
|
||||
// Each entry has a unique id that starts at one and increments for each
|
||||
// entry that is added. This unique id is stable across evictions, meaning
|
||||
// it can be used as a pointer to a specific entry. As in hpack, unique ids
|
||||
// are 1-based. The unique id for ents[k] is k + evictCount + 1.
|
||||
//
|
||||
// Zero is not a valid unique id.
|
||||
//
|
||||
// evictCount should not overflow in any remotely practical situation. In
|
||||
// practice, we will have one dynamic table per HTTP/2 connection. If we
|
||||
// assume a very powerful server that handles 1M QPS per connection and each
|
||||
// request adds (then evicts) 100 entries from the table, it would still take
|
||||
// 2M years for evictCount to overflow.
|
||||
ents []HeaderField
|
||||
evictCount uint64
|
||||
|
||||
// byName maps a HeaderField name to the unique id of the newest entry with
|
||||
// the same name. See above for a definition of "unique id".
|
||||
byName map[string]uint64
|
||||
|
||||
// byNameValue maps a HeaderField name/value pair to the unique id of the newest
|
||||
// entry with the same name and value. See above for a definition of "unique id".
|
||||
byNameValue map[pairNameValue]uint64
|
||||
}
|
||||
|
||||
type pairNameValue struct {
|
||||
name, value string
|
||||
}
|
||||
|
||||
func (t *headerFieldTable) init() {
|
||||
t.byName = make(map[string]uint64)
|
||||
t.byNameValue = make(map[pairNameValue]uint64)
|
||||
}
|
||||
|
||||
// len reports the number of entries in the table.
|
||||
func (t *headerFieldTable) len() int {
|
||||
return len(t.ents)
|
||||
}
|
||||
|
||||
// addEntry adds a new entry.
|
||||
func (t *headerFieldTable) addEntry(f HeaderField) {
|
||||
id := uint64(t.len()) + t.evictCount + 1
|
||||
t.byName[f.Name] = id
|
||||
t.byNameValue[pairNameValue{f.Name, f.Value}] = id
|
||||
t.ents = append(t.ents, f)
|
||||
}
|
||||
|
||||
// evictOldest evicts the n oldest entries in the table.
|
||||
func (t *headerFieldTable) evictOldest(n int) {
|
||||
if n > t.len() {
|
||||
panic(fmt.Sprintf("evictOldest(%v) on table with %v entries", n, t.len()))
|
||||
}
|
||||
for k := 0; k < n; k++ {
|
||||
f := t.ents[k]
|
||||
id := t.evictCount + uint64(k) + 1
|
||||
if t.byName[f.Name] == id {
|
||||
delete(t.byName, f.Name)
|
||||
}
|
||||
if p := (pairNameValue{f.Name, f.Value}); t.byNameValue[p] == id {
|
||||
delete(t.byNameValue, p)
|
||||
}
|
||||
}
|
||||
copy(t.ents, t.ents[n:])
|
||||
for k := t.len() - n; k < t.len(); k++ {
|
||||
t.ents[k] = HeaderField{} // so strings can be garbage collected
|
||||
}
|
||||
t.ents = t.ents[:t.len()-n]
|
||||
if t.evictCount+uint64(n) < t.evictCount {
|
||||
panic("evictCount overflow")
|
||||
}
|
||||
t.evictCount += uint64(n)
|
||||
}
|
||||
|
||||
// search finds f in the table. If there is no match, i is 0.
|
||||
// If both name and value match, i is the matched index and nameValueMatch
|
||||
// becomes true. If only name matches, i points to that index and
|
||||
// nameValueMatch becomes false.
|
||||
//
|
||||
// The returned index is a 1-based HPACK index. For dynamic tables, HPACK says
|
||||
// that index 1 should be the newest entry, but t.ents[0] is the oldest entry,
|
||||
// meaning t.ents is reversed for dynamic tables. Hence, when t is a dynamic
|
||||
// table, the return value i actually refers to the entry t.ents[t.len()-i].
|
||||
//
|
||||
// All tables are assumed to be a dynamic tables except for the global
|
||||
// staticTable pointer.
|
||||
//
|
||||
// See Section 2.3.3.
|
||||
func (t *headerFieldTable) search(f HeaderField) (i uint64, nameValueMatch bool) {
|
||||
if !f.Sensitive {
|
||||
if id := t.byNameValue[pairNameValue{f.Name, f.Value}]; id != 0 {
|
||||
return t.idToIndex(id), true
|
||||
}
|
||||
}
|
||||
if id := t.byName[f.Name]; id != 0 {
|
||||
return t.idToIndex(id), false
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// idToIndex converts a unique id to an HPACK index.
|
||||
// See Section 2.3.3.
|
||||
func (t *headerFieldTable) idToIndex(id uint64) uint64 {
|
||||
if id <= t.evictCount {
|
||||
panic(fmt.Sprintf("id (%v) <= evictCount (%v)", id, t.evictCount))
|
||||
}
|
||||
k := id - t.evictCount - 1 // convert id to an index t.ents[k]
|
||||
if t != staticTable {
|
||||
return uint64(t.len()) - k // dynamic table
|
||||
}
|
||||
return k + 1
|
||||
}
|
||||
|
||||
// http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-07#appendix-B
|
||||
var staticTable = newStaticTable()
|
||||
var staticTableEntries = [...]HeaderField{
|
||||
{Name: ":authority"},
|
||||
{Name: ":method", Value: "GET"},
|
||||
{Name: ":method", Value: "POST"},
|
||||
{Name: ":path", Value: "/"},
|
||||
{Name: ":path", Value: "/index.html"},
|
||||
{Name: ":scheme", Value: "http"},
|
||||
{Name: ":scheme", Value: "https"},
|
||||
{Name: ":status", Value: "200"},
|
||||
{Name: ":status", Value: "204"},
|
||||
{Name: ":status", Value: "206"},
|
||||
{Name: ":status", Value: "304"},
|
||||
{Name: ":status", Value: "400"},
|
||||
{Name: ":status", Value: "404"},
|
||||
{Name: ":status", Value: "500"},
|
||||
{Name: "accept-charset"},
|
||||
{Name: "accept-encoding", Value: "gzip, deflate"},
|
||||
{Name: "accept-language"},
|
||||
{Name: "accept-ranges"},
|
||||
{Name: "accept"},
|
||||
{Name: "access-control-allow-origin"},
|
||||
{Name: "age"},
|
||||
{Name: "allow"},
|
||||
{Name: "authorization"},
|
||||
{Name: "cache-control"},
|
||||
{Name: "content-disposition"},
|
||||
{Name: "content-encoding"},
|
||||
{Name: "content-language"},
|
||||
{Name: "content-length"},
|
||||
{Name: "content-location"},
|
||||
{Name: "content-range"},
|
||||
{Name: "content-type"},
|
||||
{Name: "cookie"},
|
||||
{Name: "date"},
|
||||
{Name: "etag"},
|
||||
{Name: "expect"},
|
||||
{Name: "expires"},
|
||||
{Name: "from"},
|
||||
{Name: "host"},
|
||||
{Name: "if-match"},
|
||||
{Name: "if-modified-since"},
|
||||
{Name: "if-none-match"},
|
||||
{Name: "if-range"},
|
||||
{Name: "if-unmodified-since"},
|
||||
{Name: "last-modified"},
|
||||
{Name: "link"},
|
||||
{Name: "location"},
|
||||
{Name: "max-forwards"},
|
||||
{Name: "proxy-authenticate"},
|
||||
{Name: "proxy-authorization"},
|
||||
{Name: "range"},
|
||||
{Name: "referer"},
|
||||
{Name: "refresh"},
|
||||
{Name: "retry-after"},
|
||||
{Name: "server"},
|
||||
{Name: "set-cookie"},
|
||||
{Name: "strict-transport-security"},
|
||||
{Name: "transfer-encoding"},
|
||||
{Name: "user-agent"},
|
||||
{Name: "vary"},
|
||||
{Name: "via"},
|
||||
{Name: "www-authenticate"},
|
||||
}
|
||||
|
||||
func newStaticTable() *headerFieldTable {
|
||||
t := &headerFieldTable{}
|
||||
t.init()
|
||||
for _, e := range staticTableEntries[:] {
|
||||
t.addEntry(e)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
var huffmanCodes = [256]uint32{
|
||||
0x1ff8,
|
||||
0x7fffd8,
|
||||
0xfffffe2,
|
||||
0xfffffe3,
|
||||
0xfffffe4,
|
||||
0xfffffe5,
|
||||
0xfffffe6,
|
||||
0xfffffe7,
|
||||
0xfffffe8,
|
||||
0xffffea,
|
||||
0x3ffffffc,
|
||||
0xfffffe9,
|
||||
0xfffffea,
|
||||
0x3ffffffd,
|
||||
0xfffffeb,
|
||||
0xfffffec,
|
||||
0xfffffed,
|
||||
0xfffffee,
|
||||
0xfffffef,
|
||||
0xffffff0,
|
||||
0xffffff1,
|
||||
0xffffff2,
|
||||
0x3ffffffe,
|
||||
0xffffff3,
|
||||
0xffffff4,
|
||||
0xffffff5,
|
||||
0xffffff6,
|
||||
0xffffff7,
|
||||
0xffffff8,
|
||||
0xffffff9,
|
||||
0xffffffa,
|
||||
0xffffffb,
|
||||
0x14,
|
||||
0x3f8,
|
||||
0x3f9,
|
||||
0xffa,
|
||||
0x1ff9,
|
||||
0x15,
|
||||
0xf8,
|
||||
0x7fa,
|
||||
0x3fa,
|
||||
0x3fb,
|
||||
0xf9,
|
||||
0x7fb,
|
||||
0xfa,
|
||||
0x16,
|
||||
0x17,
|
||||
0x18,
|
||||
0x0,
|
||||
0x1,
|
||||
0x2,
|
||||
0x19,
|
||||
0x1a,
|
||||
0x1b,
|
||||
0x1c,
|
||||
0x1d,
|
||||
0x1e,
|
||||
0x1f,
|
||||
0x5c,
|
||||
0xfb,
|
||||
0x7ffc,
|
||||
0x20,
|
||||
0xffb,
|
||||
0x3fc,
|
||||
0x1ffa,
|
||||
0x21,
|
||||
0x5d,
|
||||
0x5e,
|
||||
0x5f,
|
||||
0x60,
|
||||
0x61,
|
||||
0x62,
|
||||
0x63,
|
||||
0x64,
|
||||
0x65,
|
||||
0x66,
|
||||
0x67,
|
||||
0x68,
|
||||
0x69,
|
||||
0x6a,
|
||||
0x6b,
|
||||
0x6c,
|
||||
0x6d,
|
||||
0x6e,
|
||||
0x6f,
|
||||
0x70,
|
||||
0x71,
|
||||
0x72,
|
||||
0xfc,
|
||||
0x73,
|
||||
0xfd,
|
||||
0x1ffb,
|
||||
0x7fff0,
|
||||
0x1ffc,
|
||||
0x3ffc,
|
||||
0x22,
|
||||
0x7ffd,
|
||||
0x3,
|
||||
0x23,
|
||||
0x4,
|
||||
0x24,
|
||||
0x5,
|
||||
0x25,
|
||||
0x26,
|
||||
0x27,
|
||||
0x6,
|
||||
0x74,
|
||||
0x75,
|
||||
0x28,
|
||||
0x29,
|
||||
0x2a,
|
||||
0x7,
|
||||
0x2b,
|
||||
0x76,
|
||||
0x2c,
|
||||
0x8,
|
||||
0x9,
|
||||
0x2d,
|
||||
0x77,
|
||||
0x78,
|
||||
0x79,
|
||||
0x7a,
|
||||
0x7b,
|
||||
0x7ffe,
|
||||
0x7fc,
|
||||
0x3ffd,
|
||||
0x1ffd,
|
||||
0xffffffc,
|
||||
0xfffe6,
|
||||
0x3fffd2,
|
||||
0xfffe7,
|
||||
0xfffe8,
|
||||
0x3fffd3,
|
||||
0x3fffd4,
|
||||
0x3fffd5,
|
||||
0x7fffd9,
|
||||
0x3fffd6,
|
||||
0x7fffda,
|
||||
0x7fffdb,
|
||||
0x7fffdc,
|
||||
0x7fffdd,
|
||||
0x7fffde,
|
||||
0xffffeb,
|
||||
0x7fffdf,
|
||||
0xffffec,
|
||||
0xffffed,
|
||||
0x3fffd7,
|
||||
0x7fffe0,
|
||||
0xffffee,
|
||||
0x7fffe1,
|
||||
0x7fffe2,
|
||||
0x7fffe3,
|
||||
0x7fffe4,
|
||||
0x1fffdc,
|
||||
0x3fffd8,
|
||||
0x7fffe5,
|
||||
0x3fffd9,
|
||||
0x7fffe6,
|
||||
0x7fffe7,
|
||||
0xffffef,
|
||||
0x3fffda,
|
||||
0x1fffdd,
|
||||
0xfffe9,
|
||||
0x3fffdb,
|
||||
0x3fffdc,
|
||||
0x7fffe8,
|
||||
0x7fffe9,
|
||||
0x1fffde,
|
||||
0x7fffea,
|
||||
0x3fffdd,
|
||||
0x3fffde,
|
||||
0xfffff0,
|
||||
0x1fffdf,
|
||||
0x3fffdf,
|
||||
0x7fffeb,
|
||||
0x7fffec,
|
||||
0x1fffe0,
|
||||
0x1fffe1,
|
||||
0x3fffe0,
|
||||
0x1fffe2,
|
||||
0x7fffed,
|
||||
0x3fffe1,
|
||||
0x7fffee,
|
||||
0x7fffef,
|
||||
0xfffea,
|
||||
0x3fffe2,
|
||||
0x3fffe3,
|
||||
0x3fffe4,
|
||||
0x7ffff0,
|
||||
0x3fffe5,
|
||||
0x3fffe6,
|
||||
0x7ffff1,
|
||||
0x3ffffe0,
|
||||
0x3ffffe1,
|
||||
0xfffeb,
|
||||
0x7fff1,
|
||||
0x3fffe7,
|
||||
0x7ffff2,
|
||||
0x3fffe8,
|
||||
0x1ffffec,
|
||||
0x3ffffe2,
|
||||
0x3ffffe3,
|
||||
0x3ffffe4,
|
||||
0x7ffffde,
|
||||
0x7ffffdf,
|
||||
0x3ffffe5,
|
||||
0xfffff1,
|
||||
0x1ffffed,
|
||||
0x7fff2,
|
||||
0x1fffe3,
|
||||
0x3ffffe6,
|
||||
0x7ffffe0,
|
||||
0x7ffffe1,
|
||||
0x3ffffe7,
|
||||
0x7ffffe2,
|
||||
0xfffff2,
|
||||
0x1fffe4,
|
||||
0x1fffe5,
|
||||
0x3ffffe8,
|
||||
0x3ffffe9,
|
||||
0xffffffd,
|
||||
0x7ffffe3,
|
||||
0x7ffffe4,
|
||||
0x7ffffe5,
|
||||
0xfffec,
|
||||
0xfffff3,
|
||||
0xfffed,
|
||||
0x1fffe6,
|
||||
0x3fffe9,
|
||||
0x1fffe7,
|
||||
0x1fffe8,
|
||||
0x7ffff3,
|
||||
0x3fffea,
|
||||
0x3fffeb,
|
||||
0x1ffffee,
|
||||
0x1ffffef,
|
||||
0xfffff4,
|
||||
0xfffff5,
|
||||
0x3ffffea,
|
||||
0x7ffff4,
|
||||
0x3ffffeb,
|
||||
0x7ffffe6,
|
||||
0x3ffffec,
|
||||
0x3ffffed,
|
||||
0x7ffffe7,
|
||||
0x7ffffe8,
|
||||
0x7ffffe9,
|
||||
0x7ffffea,
|
||||
0x7ffffeb,
|
||||
0xffffffe,
|
||||
0x7ffffec,
|
||||
0x7ffffed,
|
||||
0x7ffffee,
|
||||
0x7ffffef,
|
||||
0x7fffff0,
|
||||
0x3ffffee,
|
||||
}
|
||||
|
||||
var huffmanCodeLen = [256]uint8{
|
||||
13, 23, 28, 28, 28, 28, 28, 28, 28, 24, 30, 28, 28, 30, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 30, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
6, 10, 10, 12, 13, 6, 8, 11, 10, 10, 8, 11, 8, 6, 6, 6,
|
||||
5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 8, 15, 6, 12, 10,
|
||||
13, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 8, 13, 19, 13, 14, 6,
|
||||
15, 5, 6, 5, 6, 5, 6, 6, 6, 5, 7, 7, 6, 6, 6, 5,
|
||||
6, 7, 6, 5, 5, 6, 7, 7, 7, 7, 7, 15, 11, 14, 13, 28,
|
||||
20, 22, 20, 20, 22, 22, 22, 23, 22, 23, 23, 23, 23, 23, 24, 23,
|
||||
24, 24, 22, 23, 24, 23, 23, 23, 23, 21, 22, 23, 22, 23, 23, 24,
|
||||
22, 21, 20, 22, 22, 23, 23, 21, 23, 22, 22, 24, 21, 22, 23, 23,
|
||||
21, 21, 22, 21, 23, 22, 23, 23, 20, 22, 22, 22, 23, 22, 22, 23,
|
||||
26, 26, 20, 19, 22, 23, 22, 25, 26, 26, 26, 27, 27, 26, 24, 25,
|
||||
19, 21, 26, 27, 27, 26, 27, 24, 21, 21, 26, 26, 28, 27, 27, 27,
|
||||
20, 24, 20, 21, 22, 21, 21, 23, 22, 22, 25, 25, 24, 24, 26, 23,
|
||||
26, 27, 26, 26, 27, 27, 27, 27, 27, 28, 27, 27, 27, 27, 27, 26,
|
||||
}
|
|
@ -0,0 +1,391 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package http2 implements the HTTP/2 protocol.
|
||||
//
|
||||
// This package is low-level and intended to be used directly by very
|
||||
// few people. Most users will use it indirectly through the automatic
|
||||
// use by the net/http package (from Go 1.6 and later).
|
||||
// For use in earlier Go versions see ConfigureServer. (Transport support
|
||||
// requires Go 1.6 or later)
|
||||
//
|
||||
// See https://http2.github.io/ for more information on HTTP/2.
|
||||
//
|
||||
// See https://http2.golang.org/ for a test server running this code.
|
||||
//
|
||||
package http2 // import "golang.org/x/net/http2"
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/net/lex/httplex"
|
||||
)
|
||||
|
||||
var (
|
||||
VerboseLogs bool
|
||||
logFrameWrites bool
|
||||
logFrameReads bool
|
||||
inTests bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
e := os.Getenv("GODEBUG")
|
||||
if strings.Contains(e, "http2debug=1") {
|
||||
VerboseLogs = true
|
||||
}
|
||||
if strings.Contains(e, "http2debug=2") {
|
||||
VerboseLogs = true
|
||||
logFrameWrites = true
|
||||
logFrameReads = true
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
// ClientPreface is the string that must be sent by new
|
||||
// connections from clients.
|
||||
ClientPreface = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
|
||||
|
||||
// SETTINGS_MAX_FRAME_SIZE default
|
||||
// http://http2.github.io/http2-spec/#rfc.section.6.5.2
|
||||
initialMaxFrameSize = 16384
|
||||
|
||||
// NextProtoTLS is the NPN/ALPN protocol negotiated during
|
||||
// HTTP/2's TLS setup.
|
||||
NextProtoTLS = "h2"
|
||||
|
||||
// http://http2.github.io/http2-spec/#SettingValues
|
||||
initialHeaderTableSize = 4096
|
||||
|
||||
initialWindowSize = 65535 // 6.9.2 Initial Flow Control Window Size
|
||||
|
||||
defaultMaxReadFrameSize = 1 << 20
|
||||
)
|
||||
|
||||
var (
|
||||
clientPreface = []byte(ClientPreface)
|
||||
)
|
||||
|
||||
type streamState int
|
||||
|
||||
// HTTP/2 stream states.
|
||||
//
|
||||
// See http://tools.ietf.org/html/rfc7540#section-5.1.
|
||||
//
|
||||
// For simplicity, the server code merges "reserved (local)" into
|
||||
// "half-closed (remote)". This is one less state transition to track.
|
||||
// The only downside is that we send PUSH_PROMISEs slightly less
|
||||
// liberally than allowable. More discussion here:
|
||||
// https://lists.w3.org/Archives/Public/ietf-http-wg/2016JulSep/0599.html
|
||||
//
|
||||
// "reserved (remote)" is omitted since the client code does not
|
||||
// support server push.
|
||||
const (
|
||||
stateIdle streamState = iota
|
||||
stateOpen
|
||||
stateHalfClosedLocal
|
||||
stateHalfClosedRemote
|
||||
stateClosed
|
||||
)
|
||||
|
||||
var stateName = [...]string{
|
||||
stateIdle: "Idle",
|
||||
stateOpen: "Open",
|
||||
stateHalfClosedLocal: "HalfClosedLocal",
|
||||
stateHalfClosedRemote: "HalfClosedRemote",
|
||||
stateClosed: "Closed",
|
||||
}
|
||||
|
||||
func (st streamState) String() string {
|
||||
return stateName[st]
|
||||
}
|
||||
|
||||
// Setting is a setting parameter: which setting it is, and its value.
|
||||
type Setting struct {
|
||||
// ID is which setting is being set.
|
||||
// See http://http2.github.io/http2-spec/#SettingValues
|
||||
ID SettingID
|
||||
|
||||
// Val is the value.
|
||||
Val uint32
|
||||
}
|
||||
|
||||
func (s Setting) String() string {
|
||||
return fmt.Sprintf("[%v = %d]", s.ID, s.Val)
|
||||
}
|
||||
|
||||
// Valid reports whether the setting is valid.
|
||||
func (s Setting) Valid() error {
|
||||
// Limits and error codes from 6.5.2 Defined SETTINGS Parameters
|
||||
switch s.ID {
|
||||
case SettingEnablePush:
|
||||
if s.Val != 1 && s.Val != 0 {
|
||||
return ConnectionError(ErrCodeProtocol)
|
||||
}
|
||||
case SettingInitialWindowSize:
|
||||
if s.Val > 1<<31-1 {
|
||||
return ConnectionError(ErrCodeFlowControl)
|
||||
}
|
||||
case SettingMaxFrameSize:
|
||||
if s.Val < 16384 || s.Val > 1<<24-1 {
|
||||
return ConnectionError(ErrCodeProtocol)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// A SettingID is an HTTP/2 setting as defined in
|
||||
// http://http2.github.io/http2-spec/#iana-settings
|
||||
type SettingID uint16
|
||||
|
||||
const (
|
||||
SettingHeaderTableSize SettingID = 0x1
|
||||
SettingEnablePush SettingID = 0x2
|
||||
SettingMaxConcurrentStreams SettingID = 0x3
|
||||
SettingInitialWindowSize SettingID = 0x4
|
||||
SettingMaxFrameSize SettingID = 0x5
|
||||
SettingMaxHeaderListSize SettingID = 0x6
|
||||
)
|
||||
|
||||
var settingName = map[SettingID]string{
|
||||
SettingHeaderTableSize: "HEADER_TABLE_SIZE",
|
||||
SettingEnablePush: "ENABLE_PUSH",
|
||||
SettingMaxConcurrentStreams: "MAX_CONCURRENT_STREAMS",
|
||||
SettingInitialWindowSize: "INITIAL_WINDOW_SIZE",
|
||||
SettingMaxFrameSize: "MAX_FRAME_SIZE",
|
||||
SettingMaxHeaderListSize: "MAX_HEADER_LIST_SIZE",
|
||||
}
|
||||
|
||||
func (s SettingID) String() string {
|
||||
if v, ok := settingName[s]; ok {
|
||||
return v
|
||||
}
|
||||
return fmt.Sprintf("UNKNOWN_SETTING_%d", uint16(s))
|
||||
}
|
||||
|
||||
var (
|
||||
errInvalidHeaderFieldName = errors.New("http2: invalid header field name")
|
||||
errInvalidHeaderFieldValue = errors.New("http2: invalid header field value")
|
||||
)
|
||||
|
||||
// validWireHeaderFieldName reports whether v is a valid header field
|
||||
// name (key). See httplex.ValidHeaderName for the base rules.
|
||||
//
|
||||
// Further, http2 says:
|
||||
// "Just as in HTTP/1.x, header field names are strings of ASCII
|
||||
// characters that are compared in a case-insensitive
|
||||
// fashion. However, header field names MUST be converted to
|
||||
// lowercase prior to their encoding in HTTP/2. "
|
||||
func validWireHeaderFieldName(v string) bool {
|
||||
if len(v) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, r := range v {
|
||||
if !httplex.IsTokenRune(r) {
|
||||
return false
|
||||
}
|
||||
if 'A' <= r && r <= 'Z' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var httpCodeStringCommon = map[int]string{} // n -> strconv.Itoa(n)
|
||||
|
||||
func init() {
|
||||
for i := 100; i <= 999; i++ {
|
||||
if v := http.StatusText(i); v != "" {
|
||||
httpCodeStringCommon[i] = strconv.Itoa(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func httpCodeString(code int) string {
|
||||
if s, ok := httpCodeStringCommon[code]; ok {
|
||||
return s
|
||||
}
|
||||
return strconv.Itoa(code)
|
||||
}
|
||||
|
||||
// from pkg io
|
||||
type stringWriter interface {
|
||||
WriteString(s string) (n int, err error)
|
||||
}
|
||||
|
||||
// A gate lets two goroutines coordinate their activities.
|
||||
type gate chan struct{}
|
||||
|
||||
func (g gate) Done() { g <- struct{}{} }
|
||||
func (g gate) Wait() { <-g }
|
||||
|
||||
// A closeWaiter is like a sync.WaitGroup but only goes 1 to 0 (open to closed).
|
||||
type closeWaiter chan struct{}
|
||||
|
||||
// Init makes a closeWaiter usable.
|
||||
// It exists because so a closeWaiter value can be placed inside a
|
||||
// larger struct and have the Mutex and Cond's memory in the same
|
||||
// allocation.
|
||||
func (cw *closeWaiter) Init() {
|
||||
*cw = make(chan struct{})
|
||||
}
|
||||
|
||||
// Close marks the closeWaiter as closed and unblocks any waiters.
|
||||
func (cw closeWaiter) Close() {
|
||||
close(cw)
|
||||
}
|
||||
|
||||
// Wait waits for the closeWaiter to become closed.
|
||||
func (cw closeWaiter) Wait() {
|
||||
<-cw
|
||||
}
|
||||
|
||||
// bufferedWriter is a buffered writer that writes to w.
|
||||
// Its buffered writer is lazily allocated as needed, to minimize
|
||||
// idle memory usage with many connections.
|
||||
type bufferedWriter struct {
|
||||
w io.Writer // immutable
|
||||
bw *bufio.Writer // non-nil when data is buffered
|
||||
}
|
||||
|
||||
func newBufferedWriter(w io.Writer) *bufferedWriter {
|
||||
return &bufferedWriter{w: w}
|
||||
}
|
||||
|
||||
// bufWriterPoolBufferSize is the size of bufio.Writer's
|
||||
// buffers created using bufWriterPool.
|
||||
//
|
||||
// TODO: pick a less arbitrary value? this is a bit under
|
||||
// (3 x typical 1500 byte MTU) at least. Other than that,
|
||||
// not much thought went into it.
|
||||
const bufWriterPoolBufferSize = 4 << 10
|
||||
|
||||
var bufWriterPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return bufio.NewWriterSize(nil, bufWriterPoolBufferSize)
|
||||
},
|
||||
}
|
||||
|
||||
func (w *bufferedWriter) Available() int {
|
||||
if w.bw == nil {
|
||||
return bufWriterPoolBufferSize
|
||||
}
|
||||
return w.bw.Available()
|
||||
}
|
||||
|
||||
func (w *bufferedWriter) Write(p []byte) (n int, err error) {
|
||||
if w.bw == nil {
|
||||
bw := bufWriterPool.Get().(*bufio.Writer)
|
||||
bw.Reset(w.w)
|
||||
w.bw = bw
|
||||
}
|
||||
return w.bw.Write(p)
|
||||
}
|
||||
|
||||
func (w *bufferedWriter) Flush() error {
|
||||
bw := w.bw
|
||||
if bw == nil {
|
||||
return nil
|
||||
}
|
||||
err := bw.Flush()
|
||||
bw.Reset(nil)
|
||||
bufWriterPool.Put(bw)
|
||||
w.bw = nil
|
||||
return err
|
||||
}
|
||||
|
||||
func mustUint31(v int32) uint32 {
|
||||
if v < 0 || v > 2147483647 {
|
||||
panic("out of range")
|
||||
}
|
||||
return uint32(v)
|
||||
}
|
||||
|
||||
// bodyAllowedForStatus reports whether a given response status code
|
||||
// permits a body. See RFC 2616, section 4.4.
|
||||
func bodyAllowedForStatus(status int) bool {
|
||||
switch {
|
||||
case status >= 100 && status <= 199:
|
||||
return false
|
||||
case status == 204:
|
||||
return false
|
||||
case status == 304:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type httpError struct {
|
||||
msg string
|
||||
timeout bool
|
||||
}
|
||||
|
||||
func (e *httpError) Error() string { return e.msg }
|
||||
func (e *httpError) Timeout() bool { return e.timeout }
|
||||
func (e *httpError) Temporary() bool { return true }
|
||||
|
||||
var errTimeout error = &httpError{msg: "http2: timeout awaiting response headers", timeout: true}
|
||||
|
||||
type connectionStater interface {
|
||||
ConnectionState() tls.ConnectionState
|
||||
}
|
||||
|
||||
var sorterPool = sync.Pool{New: func() interface{} { return new(sorter) }}
|
||||
|
||||
type sorter struct {
|
||||
v []string // owned by sorter
|
||||
}
|
||||
|
||||
func (s *sorter) Len() int { return len(s.v) }
|
||||
func (s *sorter) Swap(i, j int) { s.v[i], s.v[j] = s.v[j], s.v[i] }
|
||||
func (s *sorter) Less(i, j int) bool { return s.v[i] < s.v[j] }
|
||||
|
||||
// Keys returns the sorted keys of h.
|
||||
//
|
||||
// The returned slice is only valid until s used again or returned to
|
||||
// its pool.
|
||||
func (s *sorter) Keys(h http.Header) []string {
|
||||
keys := s.v[:0]
|
||||
for k := range h {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
s.v = keys
|
||||
sort.Sort(s)
|
||||
return keys
|
||||
}
|
||||
|
||||
func (s *sorter) SortStrings(ss []string) {
|
||||
// Our sorter works on s.v, which sorter owns, so
|
||||
// stash it away while we sort the user's buffer.
|
||||
save := s.v
|
||||
s.v = ss
|
||||
sort.Sort(s)
|
||||
s.v = save
|
||||
}
|
||||
|
||||
// validPseudoPath reports whether v is a valid :path pseudo-header
|
||||
// value. It must be either:
|
||||
//
|
||||
// *) a non-empty string starting with '/'
|
||||
// *) the string '*', for OPTIONS requests.
|
||||
//
|
||||
// For now this is only used a quick check for deciding when to clean
|
||||
// up Opaque URLs before sending requests from the Transport.
|
||||
// See golang.org/issue/16847
|
||||
//
|
||||
// We used to enforce that the path also didn't start with "//", but
|
||||
// Google's GFE accepts such paths and Chrome sends them, so ignore
|
||||
// that part of the spec. See golang.org/issue/19103.
|
||||
func validPseudoPath(v string) bool {
|
||||
return (len(v) > 0 && v[0] == '/') || v == "*"
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.6
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
func configureTransport(t1 *http.Transport) (*Transport, error) {
|
||||
return nil, errTransportVersion
|
||||
}
|
||||
|
||||
func transportExpectContinueTimeout(t1 *http.Transport) time.Duration {
|
||||
return 0
|
||||
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.7
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"net"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
type contextContext interface {
|
||||
Done() <-chan struct{}
|
||||
Err() error
|
||||
}
|
||||
|
||||
type fakeContext struct{}
|
||||
|
||||
func (fakeContext) Done() <-chan struct{} { return nil }
|
||||
func (fakeContext) Err() error { panic("should not be called") }
|
||||
|
||||
func reqContext(r *http.Request) fakeContext {
|
||||
return fakeContext{}
|
||||
}
|
||||
|
||||
func setResponseUncompressed(res *http.Response) {
|
||||
// Nothing.
|
||||
}
|
||||
|
||||
type clientTrace struct{}
|
||||
|
||||
func requestTrace(*http.Request) *clientTrace { return nil }
|
||||
func traceGotConn(*http.Request, *ClientConn) {}
|
||||
func traceFirstResponseByte(*clientTrace) {}
|
||||
func traceWroteHeaders(*clientTrace) {}
|
||||
func traceWroteRequest(*clientTrace, error) {}
|
||||
func traceGot100Continue(trace *clientTrace) {}
|
||||
func traceWait100Continue(trace *clientTrace) {}
|
||||
|
||||
func nop() {}
|
||||
|
||||
func serverConnBaseContext(c net.Conn, opts *ServeConnOpts) (ctx contextContext, cancel func()) {
|
||||
return nil, nop
|
||||
}
|
||||
|
||||
func contextWithCancel(ctx contextContext) (_ contextContext, cancel func()) {
|
||||
return ctx, nop
|
||||
}
|
||||
|
||||
func requestWithContext(req *http.Request, ctx contextContext) *http.Request {
|
||||
return req
|
||||
}
|
||||
|
||||
// temporary copy of Go 1.6's private tls.Config.clone:
|
||||
func cloneTLSConfig(c *tls.Config) *tls.Config {
|
||||
return &tls.Config{
|
||||
Rand: c.Rand,
|
||||
Time: c.Time,
|
||||
Certificates: c.Certificates,
|
||||
NameToCertificate: c.NameToCertificate,
|
||||
GetCertificate: c.GetCertificate,
|
||||
RootCAs: c.RootCAs,
|
||||
NextProtos: c.NextProtos,
|
||||
ServerName: c.ServerName,
|
||||
ClientAuth: c.ClientAuth,
|
||||
ClientCAs: c.ClientCAs,
|
||||
InsecureSkipVerify: c.InsecureSkipVerify,
|
||||
CipherSuites: c.CipherSuites,
|
||||
PreferServerCipherSuites: c.PreferServerCipherSuites,
|
||||
SessionTicketsDisabled: c.SessionTicketsDisabled,
|
||||
SessionTicketKey: c.SessionTicketKey,
|
||||
ClientSessionCache: c.ClientSessionCache,
|
||||
MinVersion: c.MinVersion,
|
||||
MaxVersion: c.MaxVersion,
|
||||
CurvePreferences: c.CurvePreferences,
|
||||
}
|
||||
}
|
||||
|
||||
func (cc *ClientConn) Ping(ctx contextContext) error {
|
||||
return cc.ping(ctx)
|
||||
}
|
||||
|
||||
func (t *Transport) idleConnTimeout() time.Duration { return 0 }
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.8
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func configureServer18(h1 *http.Server, h2 *Server) error {
|
||||
// No IdleTimeout to sync prior to Go 1.8.
|
||||
return nil
|
||||
}
|
||||
|
||||
func shouldLogPanic(panicValue interface{}) bool {
|
||||
return panicValue != nil
|
||||
}
|
||||
|
||||
func reqGetBody(req *http.Request) func() (io.ReadCloser, error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func reqBodyIsNoBody(io.ReadCloser) bool { return false }
|
||||
|
||||
func go18httpNoBody() io.ReadCloser { return nil } // for tests only
|
|
@ -0,0 +1,16 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.9
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func configureServer19(s *http.Server, conf *Server) error {
|
||||
// not supported prior to go1.9
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// pipe is a goroutine-safe io.Reader/io.Writer pair. It's like
|
||||
// io.Pipe except there are no PipeReader/PipeWriter halves, and the
|
||||
// underlying buffer is an interface. (io.Pipe is always unbuffered)
|
||||
type pipe struct {
|
||||
mu sync.Mutex
|
||||
c sync.Cond // c.L lazily initialized to &p.mu
|
||||
b pipeBuffer // nil when done reading
|
||||
err error // read error once empty. non-nil means closed.
|
||||
breakErr error // immediate read error (caller doesn't see rest of b)
|
||||
donec chan struct{} // closed on error
|
||||
readFn func() // optional code to run in Read before error
|
||||
}
|
||||
|
||||
type pipeBuffer interface {
|
||||
Len() int
|
||||
io.Writer
|
||||
io.Reader
|
||||
}
|
||||
|
||||
func (p *pipe) Len() int {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if p.b == nil {
|
||||
return 0
|
||||
}
|
||||
return p.b.Len()
|
||||
}
|
||||
|
||||
// Read waits until data is available and copies bytes
|
||||
// from the buffer into p.
|
||||
func (p *pipe) Read(d []byte) (n int, err error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if p.c.L == nil {
|
||||
p.c.L = &p.mu
|
||||
}
|
||||
for {
|
||||
if p.breakErr != nil {
|
||||
return 0, p.breakErr
|
||||
}
|
||||
if p.b != nil && p.b.Len() > 0 {
|
||||
return p.b.Read(d)
|
||||
}
|
||||
if p.err != nil {
|
||||
if p.readFn != nil {
|
||||
p.readFn() // e.g. copy trailers
|
||||
p.readFn = nil // not sticky like p.err
|
||||
}
|
||||
p.b = nil
|
||||
return 0, p.err
|
||||
}
|
||||
p.c.Wait()
|
||||
}
|
||||
}
|
||||
|
||||
var errClosedPipeWrite = errors.New("write on closed buffer")
|
||||
|
||||
// Write copies bytes from p into the buffer and wakes a reader.
|
||||
// It is an error to write more data than the buffer can hold.
|
||||
func (p *pipe) Write(d []byte) (n int, err error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if p.c.L == nil {
|
||||
p.c.L = &p.mu
|
||||
}
|
||||
defer p.c.Signal()
|
||||
if p.err != nil {
|
||||
return 0, errClosedPipeWrite
|
||||
}
|
||||
if p.breakErr != nil {
|
||||
return len(d), nil // discard when there is no reader
|
||||
}
|
||||
return p.b.Write(d)
|
||||
}
|
||||
|
||||
// CloseWithError causes the next Read (waking up a current blocked
|
||||
// Read if needed) to return the provided err after all data has been
|
||||
// read.
|
||||
//
|
||||
// The error must be non-nil.
|
||||
func (p *pipe) CloseWithError(err error) { p.closeWithError(&p.err, err, nil) }
|
||||
|
||||
// BreakWithError causes the next Read (waking up a current blocked
|
||||
// Read if needed) to return the provided err immediately, without
|
||||
// waiting for unread data.
|
||||
func (p *pipe) BreakWithError(err error) { p.closeWithError(&p.breakErr, err, nil) }
|
||||
|
||||
// closeWithErrorAndCode is like CloseWithError but also sets some code to run
|
||||
// in the caller's goroutine before returning the error.
|
||||
func (p *pipe) closeWithErrorAndCode(err error, fn func()) { p.closeWithError(&p.err, err, fn) }
|
||||
|
||||
func (p *pipe) closeWithError(dst *error, err error, fn func()) {
|
||||
if err == nil {
|
||||
panic("err must be non-nil")
|
||||
}
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if p.c.L == nil {
|
||||
p.c.L = &p.mu
|
||||
}
|
||||
defer p.c.Signal()
|
||||
if *dst != nil {
|
||||
// Already been done.
|
||||
return
|
||||
}
|
||||
p.readFn = fn
|
||||
if dst == &p.breakErr {
|
||||
p.b = nil
|
||||
}
|
||||
*dst = err
|
||||
p.closeDoneLocked()
|
||||
}
|
||||
|
||||
// requires p.mu be held.
|
||||
func (p *pipe) closeDoneLocked() {
|
||||
if p.donec == nil {
|
||||
return
|
||||
}
|
||||
// Close if unclosed. This isn't racy since we always
|
||||
// hold p.mu while closing.
|
||||
select {
|
||||
case <-p.donec:
|
||||
default:
|
||||
close(p.donec)
|
||||
}
|
||||
}
|
||||
|
||||
// Err returns the error (if any) first set by BreakWithError or CloseWithError.
|
||||
func (p *pipe) Err() error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if p.breakErr != nil {
|
||||
return p.breakErr
|
||||
}
|
||||
return p.err
|
||||
}
|
||||
|
||||
// Done returns a channel which is closed if and when this pipe is closed
|
||||
// with CloseWithError.
|
||||
func (p *pipe) Done() <-chan struct{} {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if p.donec == nil {
|
||||
p.donec = make(chan struct{})
|
||||
if p.err != nil || p.breakErr != nil {
|
||||
// Already hit an error.
|
||||
p.closeDoneLocked()
|
||||
}
|
||||
}
|
||||
return p.donec
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,365 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"golang.org/x/net/http2/hpack"
|
||||
"golang.org/x/net/lex/httplex"
|
||||
)
|
||||
|
||||
// writeFramer is implemented by any type that is used to write frames.
|
||||
type writeFramer interface {
|
||||
writeFrame(writeContext) error
|
||||
|
||||
// staysWithinBuffer reports whether this writer promises that
|
||||
// it will only write less than or equal to size bytes, and it
|
||||
// won't Flush the write context.
|
||||
staysWithinBuffer(size int) bool
|
||||
}
|
||||
|
||||
// writeContext is the interface needed by the various frame writer
|
||||
// types below. All the writeFrame methods below are scheduled via the
|
||||
// frame writing scheduler (see writeScheduler in writesched.go).
|
||||
//
|
||||
// This interface is implemented by *serverConn.
|
||||
//
|
||||
// TODO: decide whether to a) use this in the client code (which didn't
|
||||
// end up using this yet, because it has a simpler design, not
|
||||
// currently implementing priorities), or b) delete this and
|
||||
// make the server code a bit more concrete.
|
||||
type writeContext interface {
|
||||
Framer() *Framer
|
||||
Flush() error
|
||||
CloseConn() error
|
||||
// HeaderEncoder returns an HPACK encoder that writes to the
|
||||
// returned buffer.
|
||||
HeaderEncoder() (*hpack.Encoder, *bytes.Buffer)
|
||||
}
|
||||
|
||||
// writeEndsStream reports whether w writes a frame that will transition
|
||||
// the stream to a half-closed local state. This returns false for RST_STREAM,
|
||||
// which closes the entire stream (not just the local half).
|
||||
func writeEndsStream(w writeFramer) bool {
|
||||
switch v := w.(type) {
|
||||
case *writeData:
|
||||
return v.endStream
|
||||
case *writeResHeaders:
|
||||
return v.endStream
|
||||
case nil:
|
||||
// This can only happen if the caller reuses w after it's
|
||||
// been intentionally nil'ed out to prevent use. Keep this
|
||||
// here to catch future refactoring breaking it.
|
||||
panic("writeEndsStream called on nil writeFramer")
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type flushFrameWriter struct{}
|
||||
|
||||
func (flushFrameWriter) writeFrame(ctx writeContext) error {
|
||||
return ctx.Flush()
|
||||
}
|
||||
|
||||
func (flushFrameWriter) staysWithinBuffer(max int) bool { return false }
|
||||
|
||||
type writeSettings []Setting
|
||||
|
||||
func (s writeSettings) staysWithinBuffer(max int) bool {
|
||||
const settingSize = 6 // uint16 + uint32
|
||||
return frameHeaderLen+settingSize*len(s) <= max
|
||||
|
||||
}
|
||||
|
||||
func (s writeSettings) writeFrame(ctx writeContext) error {
|
||||
return ctx.Framer().WriteSettings([]Setting(s)...)
|
||||
}
|
||||
|
||||
type writeGoAway struct {
|
||||
maxStreamID uint32
|
||||
code ErrCode
|
||||
}
|
||||
|
||||
func (p *writeGoAway) writeFrame(ctx writeContext) error {
|
||||
err := ctx.Framer().WriteGoAway(p.maxStreamID, p.code, nil)
|
||||
ctx.Flush() // ignore error: we're hanging up on them anyway
|
||||
return err
|
||||
}
|
||||
|
||||
func (*writeGoAway) staysWithinBuffer(max int) bool { return false } // flushes
|
||||
|
||||
type writeData struct {
|
||||
streamID uint32
|
||||
p []byte
|
||||
endStream bool
|
||||
}
|
||||
|
||||
func (w *writeData) String() string {
|
||||
return fmt.Sprintf("writeData(stream=%d, p=%d, endStream=%v)", w.streamID, len(w.p), w.endStream)
|
||||
}
|
||||
|
||||
func (w *writeData) writeFrame(ctx writeContext) error {
|
||||
return ctx.Framer().WriteData(w.streamID, w.endStream, w.p)
|
||||
}
|
||||
|
||||
func (w *writeData) staysWithinBuffer(max int) bool {
|
||||
return frameHeaderLen+len(w.p) <= max
|
||||
}
|
||||
|
||||
// handlerPanicRST is the message sent from handler goroutines when
|
||||
// the handler panics.
|
||||
type handlerPanicRST struct {
|
||||
StreamID uint32
|
||||
}
|
||||
|
||||
func (hp handlerPanicRST) writeFrame(ctx writeContext) error {
|
||||
return ctx.Framer().WriteRSTStream(hp.StreamID, ErrCodeInternal)
|
||||
}
|
||||
|
||||
func (hp handlerPanicRST) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max }
|
||||
|
||||
func (se StreamError) writeFrame(ctx writeContext) error {
|
||||
return ctx.Framer().WriteRSTStream(se.StreamID, se.Code)
|
||||
}
|
||||
|
||||
func (se StreamError) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max }
|
||||
|
||||
type writePingAck struct{ pf *PingFrame }
|
||||
|
||||
func (w writePingAck) writeFrame(ctx writeContext) error {
|
||||
return ctx.Framer().WritePing(true, w.pf.Data)
|
||||
}
|
||||
|
||||
func (w writePingAck) staysWithinBuffer(max int) bool { return frameHeaderLen+len(w.pf.Data) <= max }
|
||||
|
||||
type writeSettingsAck struct{}
|
||||
|
||||
func (writeSettingsAck) writeFrame(ctx writeContext) error {
|
||||
return ctx.Framer().WriteSettingsAck()
|
||||
}
|
||||
|
||||
func (writeSettingsAck) staysWithinBuffer(max int) bool { return frameHeaderLen <= max }
|
||||
|
||||
// splitHeaderBlock splits headerBlock into fragments so that each fragment fits
|
||||
// in a single frame, then calls fn for each fragment. firstFrag/lastFrag are true
|
||||
// for the first/last fragment, respectively.
|
||||
func splitHeaderBlock(ctx writeContext, headerBlock []byte, fn func(ctx writeContext, frag []byte, firstFrag, lastFrag bool) error) error {
|
||||
// For now we're lazy and just pick the minimum MAX_FRAME_SIZE
|
||||
// that all peers must support (16KB). Later we could care
|
||||
// more and send larger frames if the peer advertised it, but
|
||||
// there's little point. Most headers are small anyway (so we
|
||||
// generally won't have CONTINUATION frames), and extra frames
|
||||
// only waste 9 bytes anyway.
|
||||
const maxFrameSize = 16384
|
||||
|
||||
first := true
|
||||
for len(headerBlock) > 0 {
|
||||
frag := headerBlock
|
||||
if len(frag) > maxFrameSize {
|
||||
frag = frag[:maxFrameSize]
|
||||
}
|
||||
headerBlock = headerBlock[len(frag):]
|
||||
if err := fn(ctx, frag, first, len(headerBlock) == 0); err != nil {
|
||||
return err
|
||||
}
|
||||
first = false
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeResHeaders is a request to write a HEADERS and 0+ CONTINUATION frames
|
||||
// for HTTP response headers or trailers from a server handler.
|
||||
type writeResHeaders struct {
|
||||
streamID uint32
|
||||
httpResCode int // 0 means no ":status" line
|
||||
h http.Header // may be nil
|
||||
trailers []string // if non-nil, which keys of h to write. nil means all.
|
||||
endStream bool
|
||||
|
||||
date string
|
||||
contentType string
|
||||
contentLength string
|
||||
}
|
||||
|
||||
func encKV(enc *hpack.Encoder, k, v string) {
|
||||
if VerboseLogs {
|
||||
log.Printf("http2: server encoding header %q = %q", k, v)
|
||||
}
|
||||
enc.WriteField(hpack.HeaderField{Name: k, Value: v})
|
||||
}
|
||||
|
||||
func (w *writeResHeaders) staysWithinBuffer(max int) bool {
|
||||
// TODO: this is a common one. It'd be nice to return true
|
||||
// here and get into the fast path if we could be clever and
|
||||
// calculate the size fast enough, or at least a conservative
|
||||
// uppper bound that usually fires. (Maybe if w.h and
|
||||
// w.trailers are nil, so we don't need to enumerate it.)
|
||||
// Otherwise I'm afraid that just calculating the length to
|
||||
// answer this question would be slower than the ~2µs benefit.
|
||||
return false
|
||||
}
|
||||
|
||||
func (w *writeResHeaders) writeFrame(ctx writeContext) error {
|
||||
enc, buf := ctx.HeaderEncoder()
|
||||
buf.Reset()
|
||||
|
||||
if w.httpResCode != 0 {
|
||||
encKV(enc, ":status", httpCodeString(w.httpResCode))
|
||||
}
|
||||
|
||||
encodeHeaders(enc, w.h, w.trailers)
|
||||
|
||||
if w.contentType != "" {
|
||||
encKV(enc, "content-type", w.contentType)
|
||||
}
|
||||
if w.contentLength != "" {
|
||||
encKV(enc, "content-length", w.contentLength)
|
||||
}
|
||||
if w.date != "" {
|
||||
encKV(enc, "date", w.date)
|
||||
}
|
||||
|
||||
headerBlock := buf.Bytes()
|
||||
if len(headerBlock) == 0 && w.trailers == nil {
|
||||
panic("unexpected empty hpack")
|
||||
}
|
||||
|
||||
return splitHeaderBlock(ctx, headerBlock, w.writeHeaderBlock)
|
||||
}
|
||||
|
||||
func (w *writeResHeaders) writeHeaderBlock(ctx writeContext, frag []byte, firstFrag, lastFrag bool) error {
|
||||
if firstFrag {
|
||||
return ctx.Framer().WriteHeaders(HeadersFrameParam{
|
||||
StreamID: w.streamID,
|
||||
BlockFragment: frag,
|
||||
EndStream: w.endStream,
|
||||
EndHeaders: lastFrag,
|
||||
})
|
||||
} else {
|
||||
return ctx.Framer().WriteContinuation(w.streamID, lastFrag, frag)
|
||||
}
|
||||
}
|
||||
|
||||
// writePushPromise is a request to write a PUSH_PROMISE and 0+ CONTINUATION frames.
|
||||
type writePushPromise struct {
|
||||
streamID uint32 // pusher stream
|
||||
method string // for :method
|
||||
url *url.URL // for :scheme, :authority, :path
|
||||
h http.Header
|
||||
|
||||
// Creates an ID for a pushed stream. This runs on serveG just before
|
||||
// the frame is written. The returned ID is copied to promisedID.
|
||||
allocatePromisedID func() (uint32, error)
|
||||
promisedID uint32
|
||||
}
|
||||
|
||||
func (w *writePushPromise) staysWithinBuffer(max int) bool {
|
||||
// TODO: see writeResHeaders.staysWithinBuffer
|
||||
return false
|
||||
}
|
||||
|
||||
func (w *writePushPromise) writeFrame(ctx writeContext) error {
|
||||
enc, buf := ctx.HeaderEncoder()
|
||||
buf.Reset()
|
||||
|
||||
encKV(enc, ":method", w.method)
|
||||
encKV(enc, ":scheme", w.url.Scheme)
|
||||
encKV(enc, ":authority", w.url.Host)
|
||||
encKV(enc, ":path", w.url.RequestURI())
|
||||
encodeHeaders(enc, w.h, nil)
|
||||
|
||||
headerBlock := buf.Bytes()
|
||||
if len(headerBlock) == 0 {
|
||||
panic("unexpected empty hpack")
|
||||
}
|
||||
|
||||
return splitHeaderBlock(ctx, headerBlock, w.writeHeaderBlock)
|
||||
}
|
||||
|
||||
func (w *writePushPromise) writeHeaderBlock(ctx writeContext, frag []byte, firstFrag, lastFrag bool) error {
|
||||
if firstFrag {
|
||||
return ctx.Framer().WritePushPromise(PushPromiseParam{
|
||||
StreamID: w.streamID,
|
||||
PromiseID: w.promisedID,
|
||||
BlockFragment: frag,
|
||||
EndHeaders: lastFrag,
|
||||
})
|
||||
} else {
|
||||
return ctx.Framer().WriteContinuation(w.streamID, lastFrag, frag)
|
||||
}
|
||||
}
|
||||
|
||||
type write100ContinueHeadersFrame struct {
|
||||
streamID uint32
|
||||
}
|
||||
|
||||
func (w write100ContinueHeadersFrame) writeFrame(ctx writeContext) error {
|
||||
enc, buf := ctx.HeaderEncoder()
|
||||
buf.Reset()
|
||||
encKV(enc, ":status", "100")
|
||||
return ctx.Framer().WriteHeaders(HeadersFrameParam{
|
||||
StreamID: w.streamID,
|
||||
BlockFragment: buf.Bytes(),
|
||||
EndStream: false,
|
||||
EndHeaders: true,
|
||||
})
|
||||
}
|
||||
|
||||
func (w write100ContinueHeadersFrame) staysWithinBuffer(max int) bool {
|
||||
// Sloppy but conservative:
|
||||
return 9+2*(len(":status")+len("100")) <= max
|
||||
}
|
||||
|
||||
type writeWindowUpdate struct {
|
||||
streamID uint32 // or 0 for conn-level
|
||||
n uint32
|
||||
}
|
||||
|
||||
func (wu writeWindowUpdate) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max }
|
||||
|
||||
func (wu writeWindowUpdate) writeFrame(ctx writeContext) error {
|
||||
return ctx.Framer().WriteWindowUpdate(wu.streamID, wu.n)
|
||||
}
|
||||
|
||||
// encodeHeaders encodes an http.Header. If keys is not nil, then (k, h[k])
|
||||
// is encoded only only if k is in keys.
|
||||
func encodeHeaders(enc *hpack.Encoder, h http.Header, keys []string) {
|
||||
if keys == nil {
|
||||
sorter := sorterPool.Get().(*sorter)
|
||||
// Using defer here, since the returned keys from the
|
||||
// sorter.Keys method is only valid until the sorter
|
||||
// is returned:
|
||||
defer sorterPool.Put(sorter)
|
||||
keys = sorter.Keys(h)
|
||||
}
|
||||
for _, k := range keys {
|
||||
vv := h[k]
|
||||
k = lowerHeader(k)
|
||||
if !validWireHeaderFieldName(k) {
|
||||
// Skip it as backup paranoia. Per
|
||||
// golang.org/issue/14048, these should
|
||||
// already be rejected at a higher level.
|
||||
continue
|
||||
}
|
||||
isTE := k == "transfer-encoding"
|
||||
for _, v := range vv {
|
||||
if !httplex.ValidHeaderFieldValue(v) {
|
||||
// TODO: return an error? golang.org/issue/14048
|
||||
// For now just omit it.
|
||||
continue
|
||||
}
|
||||
// TODO: more of "8.1.2.2 Connection-Specific Header Fields"
|
||||
if isTE && v != "trailers" {
|
||||
continue
|
||||
}
|
||||
encKV(enc, k, v)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,242 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
import "fmt"
|
||||
|
||||
// WriteScheduler is the interface implemented by HTTP/2 write schedulers.
|
||||
// Methods are never called concurrently.
|
||||
type WriteScheduler interface {
|
||||
// OpenStream opens a new stream in the write scheduler.
|
||||
// It is illegal to call this with streamID=0 or with a streamID that is
|
||||
// already open -- the call may panic.
|
||||
OpenStream(streamID uint32, options OpenStreamOptions)
|
||||
|
||||
// CloseStream closes a stream in the write scheduler. Any frames queued on
|
||||
// this stream should be discarded. It is illegal to call this on a stream
|
||||
// that is not open -- the call may panic.
|
||||
CloseStream(streamID uint32)
|
||||
|
||||
// AdjustStream adjusts the priority of the given stream. This may be called
|
||||
// on a stream that has not yet been opened or has been closed. Note that
|
||||
// RFC 7540 allows PRIORITY frames to be sent on streams in any state. See:
|
||||
// https://tools.ietf.org/html/rfc7540#section-5.1
|
||||
AdjustStream(streamID uint32, priority PriorityParam)
|
||||
|
||||
// Push queues a frame in the scheduler. In most cases, this will not be
|
||||
// called with wr.StreamID()!=0 unless that stream is currently open. The one
|
||||
// exception is RST_STREAM frames, which may be sent on idle or closed streams.
|
||||
Push(wr FrameWriteRequest)
|
||||
|
||||
// Pop dequeues the next frame to write. Returns false if no frames can
|
||||
// be written. Frames with a given wr.StreamID() are Pop'd in the same
|
||||
// order they are Push'd.
|
||||
Pop() (wr FrameWriteRequest, ok bool)
|
||||
}
|
||||
|
||||
// OpenStreamOptions specifies extra options for WriteScheduler.OpenStream.
|
||||
type OpenStreamOptions struct {
|
||||
// PusherID is zero if the stream was initiated by the client. Otherwise,
|
||||
// PusherID names the stream that pushed the newly opened stream.
|
||||
PusherID uint32
|
||||
}
|
||||
|
||||
// FrameWriteRequest is a request to write a frame.
|
||||
type FrameWriteRequest struct {
|
||||
// write is the interface value that does the writing, once the
|
||||
// WriteScheduler has selected this frame to write. The write
|
||||
// functions are all defined in write.go.
|
||||
write writeFramer
|
||||
|
||||
// stream is the stream on which this frame will be written.
|
||||
// nil for non-stream frames like PING and SETTINGS.
|
||||
stream *stream
|
||||
|
||||
// done, if non-nil, must be a buffered channel with space for
|
||||
// 1 message and is sent the return value from write (or an
|
||||
// earlier error) when the frame has been written.
|
||||
done chan error
|
||||
}
|
||||
|
||||
// StreamID returns the id of the stream this frame will be written to.
|
||||
// 0 is used for non-stream frames such as PING and SETTINGS.
|
||||
func (wr FrameWriteRequest) StreamID() uint32 {
|
||||
if wr.stream == nil {
|
||||
if se, ok := wr.write.(StreamError); ok {
|
||||
// (*serverConn).resetStream doesn't set
|
||||
// stream because it doesn't necessarily have
|
||||
// one. So special case this type of write
|
||||
// message.
|
||||
return se.StreamID
|
||||
}
|
||||
return 0
|
||||
}
|
||||
return wr.stream.id
|
||||
}
|
||||
|
||||
// DataSize returns the number of flow control bytes that must be consumed
|
||||
// to write this entire frame. This is 0 for non-DATA frames.
|
||||
func (wr FrameWriteRequest) DataSize() int {
|
||||
if wd, ok := wr.write.(*writeData); ok {
|
||||
return len(wd.p)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Consume consumes min(n, available) bytes from this frame, where available
|
||||
// is the number of flow control bytes available on the stream. Consume returns
|
||||
// 0, 1, or 2 frames, where the integer return value gives the number of frames
|
||||
// returned.
|
||||
//
|
||||
// If flow control prevents consuming any bytes, this returns (_, _, 0). If
|
||||
// the entire frame was consumed, this returns (wr, _, 1). Otherwise, this
|
||||
// returns (consumed, rest, 2), where 'consumed' contains the consumed bytes and
|
||||
// 'rest' contains the remaining bytes. The consumed bytes are deducted from the
|
||||
// underlying stream's flow control budget.
|
||||
func (wr FrameWriteRequest) Consume(n int32) (FrameWriteRequest, FrameWriteRequest, int) {
|
||||
var empty FrameWriteRequest
|
||||
|
||||
// Non-DATA frames are always consumed whole.
|
||||
wd, ok := wr.write.(*writeData)
|
||||
if !ok || len(wd.p) == 0 {
|
||||
return wr, empty, 1
|
||||
}
|
||||
|
||||
// Might need to split after applying limits.
|
||||
allowed := wr.stream.flow.available()
|
||||
if n < allowed {
|
||||
allowed = n
|
||||
}
|
||||
if wr.stream.sc.maxFrameSize < allowed {
|
||||
allowed = wr.stream.sc.maxFrameSize
|
||||
}
|
||||
if allowed <= 0 {
|
||||
return empty, empty, 0
|
||||
}
|
||||
if len(wd.p) > int(allowed) {
|
||||
wr.stream.flow.take(allowed)
|
||||
consumed := FrameWriteRequest{
|
||||
stream: wr.stream,
|
||||
write: &writeData{
|
||||
streamID: wd.streamID,
|
||||
p: wd.p[:allowed],
|
||||
// Even if the original had endStream set, there
|
||||
// are bytes remaining because len(wd.p) > allowed,
|
||||
// so we know endStream is false.
|
||||
endStream: false,
|
||||
},
|
||||
// Our caller is blocking on the final DATA frame, not
|
||||
// this intermediate frame, so no need to wait.
|
||||
done: nil,
|
||||
}
|
||||
rest := FrameWriteRequest{
|
||||
stream: wr.stream,
|
||||
write: &writeData{
|
||||
streamID: wd.streamID,
|
||||
p: wd.p[allowed:],
|
||||
endStream: wd.endStream,
|
||||
},
|
||||
done: wr.done,
|
||||
}
|
||||
return consumed, rest, 2
|
||||
}
|
||||
|
||||
// The frame is consumed whole.
|
||||
// NB: This cast cannot overflow because allowed is <= math.MaxInt32.
|
||||
wr.stream.flow.take(int32(len(wd.p)))
|
||||
return wr, empty, 1
|
||||
}
|
||||
|
||||
// String is for debugging only.
|
||||
func (wr FrameWriteRequest) String() string {
|
||||
var des string
|
||||
if s, ok := wr.write.(fmt.Stringer); ok {
|
||||
des = s.String()
|
||||
} else {
|
||||
des = fmt.Sprintf("%T", wr.write)
|
||||
}
|
||||
return fmt.Sprintf("[FrameWriteRequest stream=%d, ch=%v, writer=%v]", wr.StreamID(), wr.done != nil, des)
|
||||
}
|
||||
|
||||
// replyToWriter sends err to wr.done and panics if the send must block
|
||||
// This does nothing if wr.done is nil.
|
||||
func (wr *FrameWriteRequest) replyToWriter(err error) {
|
||||
if wr.done == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case wr.done <- err:
|
||||
default:
|
||||
panic(fmt.Sprintf("unbuffered done channel passed in for type %T", wr.write))
|
||||
}
|
||||
wr.write = nil // prevent use (assume it's tainted after wr.done send)
|
||||
}
|
||||
|
||||
// writeQueue is used by implementations of WriteScheduler.
|
||||
type writeQueue struct {
|
||||
s []FrameWriteRequest
|
||||
}
|
||||
|
||||
func (q *writeQueue) empty() bool { return len(q.s) == 0 }
|
||||
|
||||
func (q *writeQueue) push(wr FrameWriteRequest) {
|
||||
q.s = append(q.s, wr)
|
||||
}
|
||||
|
||||
func (q *writeQueue) shift() FrameWriteRequest {
|
||||
if len(q.s) == 0 {
|
||||
panic("invalid use of queue")
|
||||
}
|
||||
wr := q.s[0]
|
||||
// TODO: less copy-happy queue.
|
||||
copy(q.s, q.s[1:])
|
||||
q.s[len(q.s)-1] = FrameWriteRequest{}
|
||||
q.s = q.s[:len(q.s)-1]
|
||||
return wr
|
||||
}
|
||||
|
||||
// consume consumes up to n bytes from q.s[0]. If the frame is
|
||||
// entirely consumed, it is removed from the queue. If the frame
|
||||
// is partially consumed, the frame is kept with the consumed
|
||||
// bytes removed. Returns true iff any bytes were consumed.
|
||||
func (q *writeQueue) consume(n int32) (FrameWriteRequest, bool) {
|
||||
if len(q.s) == 0 {
|
||||
return FrameWriteRequest{}, false
|
||||
}
|
||||
consumed, rest, numresult := q.s[0].Consume(n)
|
||||
switch numresult {
|
||||
case 0:
|
||||
return FrameWriteRequest{}, false
|
||||
case 1:
|
||||
q.shift()
|
||||
case 2:
|
||||
q.s[0] = rest
|
||||
}
|
||||
return consumed, true
|
||||
}
|
||||
|
||||
type writeQueuePool []*writeQueue
|
||||
|
||||
// put inserts an unused writeQueue into the pool.
|
||||
func (p *writeQueuePool) put(q *writeQueue) {
|
||||
for i := range q.s {
|
||||
q.s[i] = FrameWriteRequest{}
|
||||
}
|
||||
q.s = q.s[:0]
|
||||
*p = append(*p, q)
|
||||
}
|
||||
|
||||
// get returns an empty writeQueue.
|
||||
func (p *writeQueuePool) get() *writeQueue {
|
||||
ln := len(*p)
|
||||
if ln == 0 {
|
||||
return new(writeQueue)
|
||||
}
|
||||
x := ln - 1
|
||||
q := (*p)[x]
|
||||
(*p)[x] = nil
|
||||
*p = (*p)[:x]
|
||||
return q
|
||||
}
|
|
@ -0,0 +1,452 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// RFC 7540, Section 5.3.5: the default weight is 16.
|
||||
const priorityDefaultWeight = 15 // 16 = 15 + 1
|
||||
|
||||
// PriorityWriteSchedulerConfig configures a priorityWriteScheduler.
|
||||
type PriorityWriteSchedulerConfig struct {
|
||||
// MaxClosedNodesInTree controls the maximum number of closed streams to
|
||||
// retain in the priority tree. Setting this to zero saves a small amount
|
||||
// of memory at the cost of performance.
|
||||
//
|
||||
// See RFC 7540, Section 5.3.4:
|
||||
// "It is possible for a stream to become closed while prioritization
|
||||
// information ... is in transit. ... This potentially creates suboptimal
|
||||
// prioritization, since the stream could be given a priority that is
|
||||
// different from what is intended. To avoid these problems, an endpoint
|
||||
// SHOULD retain stream prioritization state for a period after streams
|
||||
// become closed. The longer state is retained, the lower the chance that
|
||||
// streams are assigned incorrect or default priority values."
|
||||
MaxClosedNodesInTree int
|
||||
|
||||
// MaxIdleNodesInTree controls the maximum number of idle streams to
|
||||
// retain in the priority tree. Setting this to zero saves a small amount
|
||||
// of memory at the cost of performance.
|
||||
//
|
||||
// See RFC 7540, Section 5.3.4:
|
||||
// Similarly, streams that are in the "idle" state can be assigned
|
||||
// priority or become a parent of other streams. This allows for the
|
||||
// creation of a grouping node in the dependency tree, which enables
|
||||
// more flexible expressions of priority. Idle streams begin with a
|
||||
// default priority (Section 5.3.5).
|
||||
MaxIdleNodesInTree int
|
||||
|
||||
// ThrottleOutOfOrderWrites enables write throttling to help ensure that
|
||||
// data is delivered in priority order. This works around a race where
|
||||
// stream B depends on stream A and both streams are about to call Write
|
||||
// to queue DATA frames. If B wins the race, a naive scheduler would eagerly
|
||||
// write as much data from B as possible, but this is suboptimal because A
|
||||
// is a higher-priority stream. With throttling enabled, we write a small
|
||||
// amount of data from B to minimize the amount of bandwidth that B can
|
||||
// steal from A.
|
||||
ThrottleOutOfOrderWrites bool
|
||||
}
|
||||
|
||||
// NewPriorityWriteScheduler constructs a WriteScheduler that schedules
|
||||
// frames by following HTTP/2 priorities as described in RFC 7540 Section 5.3.
|
||||
// If cfg is nil, default options are used.
|
||||
func NewPriorityWriteScheduler(cfg *PriorityWriteSchedulerConfig) WriteScheduler {
|
||||
if cfg == nil {
|
||||
// For justification of these defaults, see:
|
||||
// https://docs.google.com/document/d/1oLhNg1skaWD4_DtaoCxdSRN5erEXrH-KnLrMwEpOtFY
|
||||
cfg = &PriorityWriteSchedulerConfig{
|
||||
MaxClosedNodesInTree: 10,
|
||||
MaxIdleNodesInTree: 10,
|
||||
ThrottleOutOfOrderWrites: false,
|
||||
}
|
||||
}
|
||||
|
||||
ws := &priorityWriteScheduler{
|
||||
nodes: make(map[uint32]*priorityNode),
|
||||
maxClosedNodesInTree: cfg.MaxClosedNodesInTree,
|
||||
maxIdleNodesInTree: cfg.MaxIdleNodesInTree,
|
||||
enableWriteThrottle: cfg.ThrottleOutOfOrderWrites,
|
||||
}
|
||||
ws.nodes[0] = &ws.root
|
||||
if cfg.ThrottleOutOfOrderWrites {
|
||||
ws.writeThrottleLimit = 1024
|
||||
} else {
|
||||
ws.writeThrottleLimit = math.MaxInt32
|
||||
}
|
||||
return ws
|
||||
}
|
||||
|
||||
type priorityNodeState int
|
||||
|
||||
const (
|
||||
priorityNodeOpen priorityNodeState = iota
|
||||
priorityNodeClosed
|
||||
priorityNodeIdle
|
||||
)
|
||||
|
||||
// priorityNode is a node in an HTTP/2 priority tree.
|
||||
// Each node is associated with a single stream ID.
|
||||
// See RFC 7540, Section 5.3.
|
||||
type priorityNode struct {
|
||||
q writeQueue // queue of pending frames to write
|
||||
id uint32 // id of the stream, or 0 for the root of the tree
|
||||
weight uint8 // the actual weight is weight+1, so the value is in [1,256]
|
||||
state priorityNodeState // open | closed | idle
|
||||
bytes int64 // number of bytes written by this node, or 0 if closed
|
||||
subtreeBytes int64 // sum(node.bytes) of all nodes in this subtree
|
||||
|
||||
// These links form the priority tree.
|
||||
parent *priorityNode
|
||||
kids *priorityNode // start of the kids list
|
||||
prev, next *priorityNode // doubly-linked list of siblings
|
||||
}
|
||||
|
||||
func (n *priorityNode) setParent(parent *priorityNode) {
|
||||
if n == parent {
|
||||
panic("setParent to self")
|
||||
}
|
||||
if n.parent == parent {
|
||||
return
|
||||
}
|
||||
// Unlink from current parent.
|
||||
if parent := n.parent; parent != nil {
|
||||
if n.prev == nil {
|
||||
parent.kids = n.next
|
||||
} else {
|
||||
n.prev.next = n.next
|
||||
}
|
||||
if n.next != nil {
|
||||
n.next.prev = n.prev
|
||||
}
|
||||
}
|
||||
// Link to new parent.
|
||||
// If parent=nil, remove n from the tree.
|
||||
// Always insert at the head of parent.kids (this is assumed by walkReadyInOrder).
|
||||
n.parent = parent
|
||||
if parent == nil {
|
||||
n.next = nil
|
||||
n.prev = nil
|
||||
} else {
|
||||
n.next = parent.kids
|
||||
n.prev = nil
|
||||
if n.next != nil {
|
||||
n.next.prev = n
|
||||
}
|
||||
parent.kids = n
|
||||
}
|
||||
}
|
||||
|
||||
func (n *priorityNode) addBytes(b int64) {
|
||||
n.bytes += b
|
||||
for ; n != nil; n = n.parent {
|
||||
n.subtreeBytes += b
|
||||
}
|
||||
}
|
||||
|
||||
// walkReadyInOrder iterates over the tree in priority order, calling f for each node
|
||||
// with a non-empty write queue. When f returns true, this funcion returns true and the
|
||||
// walk halts. tmp is used as scratch space for sorting.
|
||||
//
|
||||
// f(n, openParent) takes two arguments: the node to visit, n, and a bool that is true
|
||||
// if any ancestor p of n is still open (ignoring the root node).
|
||||
func (n *priorityNode) walkReadyInOrder(openParent bool, tmp *[]*priorityNode, f func(*priorityNode, bool) bool) bool {
|
||||
if !n.q.empty() && f(n, openParent) {
|
||||
return true
|
||||
}
|
||||
if n.kids == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Don't consider the root "open" when updating openParent since
|
||||
// we can't send data frames on the root stream (only control frames).
|
||||
if n.id != 0 {
|
||||
openParent = openParent || (n.state == priorityNodeOpen)
|
||||
}
|
||||
|
||||
// Common case: only one kid or all kids have the same weight.
|
||||
// Some clients don't use weights; other clients (like web browsers)
|
||||
// use mostly-linear priority trees.
|
||||
w := n.kids.weight
|
||||
needSort := false
|
||||
for k := n.kids.next; k != nil; k = k.next {
|
||||
if k.weight != w {
|
||||
needSort = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !needSort {
|
||||
for k := n.kids; k != nil; k = k.next {
|
||||
if k.walkReadyInOrder(openParent, tmp, f) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Uncommon case: sort the child nodes. We remove the kids from the parent,
|
||||
// then re-insert after sorting so we can reuse tmp for future sort calls.
|
||||
*tmp = (*tmp)[:0]
|
||||
for n.kids != nil {
|
||||
*tmp = append(*tmp, n.kids)
|
||||
n.kids.setParent(nil)
|
||||
}
|
||||
sort.Sort(sortPriorityNodeSiblings(*tmp))
|
||||
for i := len(*tmp) - 1; i >= 0; i-- {
|
||||
(*tmp)[i].setParent(n) // setParent inserts at the head of n.kids
|
||||
}
|
||||
for k := n.kids; k != nil; k = k.next {
|
||||
if k.walkReadyInOrder(openParent, tmp, f) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type sortPriorityNodeSiblings []*priorityNode
|
||||
|
||||
func (z sortPriorityNodeSiblings) Len() int { return len(z) }
|
||||
func (z sortPriorityNodeSiblings) Swap(i, k int) { z[i], z[k] = z[k], z[i] }
|
||||
func (z sortPriorityNodeSiblings) Less(i, k int) bool {
|
||||
// Prefer the subtree that has sent fewer bytes relative to its weight.
|
||||
// See sections 5.3.2 and 5.3.4.
|
||||
wi, bi := float64(z[i].weight+1), float64(z[i].subtreeBytes)
|
||||
wk, bk := float64(z[k].weight+1), float64(z[k].subtreeBytes)
|
||||
if bi == 0 && bk == 0 {
|
||||
return wi >= wk
|
||||
}
|
||||
if bk == 0 {
|
||||
return false
|
||||
}
|
||||
return bi/bk <= wi/wk
|
||||
}
|
||||
|
||||
type priorityWriteScheduler struct {
|
||||
// root is the root of the priority tree, where root.id = 0.
|
||||
// The root queues control frames that are not associated with any stream.
|
||||
root priorityNode
|
||||
|
||||
// nodes maps stream ids to priority tree nodes.
|
||||
nodes map[uint32]*priorityNode
|
||||
|
||||
// maxID is the maximum stream id in nodes.
|
||||
maxID uint32
|
||||
|
||||
// lists of nodes that have been closed or are idle, but are kept in
|
||||
// the tree for improved prioritization. When the lengths exceed either
|
||||
// maxClosedNodesInTree or maxIdleNodesInTree, old nodes are discarded.
|
||||
closedNodes, idleNodes []*priorityNode
|
||||
|
||||
// From the config.
|
||||
maxClosedNodesInTree int
|
||||
maxIdleNodesInTree int
|
||||
writeThrottleLimit int32
|
||||
enableWriteThrottle bool
|
||||
|
||||
// tmp is scratch space for priorityNode.walkReadyInOrder to reduce allocations.
|
||||
tmp []*priorityNode
|
||||
|
||||
// pool of empty queues for reuse.
|
||||
queuePool writeQueuePool
|
||||
}
|
||||
|
||||
func (ws *priorityWriteScheduler) OpenStream(streamID uint32, options OpenStreamOptions) {
|
||||
// The stream may be currently idle but cannot be opened or closed.
|
||||
if curr := ws.nodes[streamID]; curr != nil {
|
||||
if curr.state != priorityNodeIdle {
|
||||
panic(fmt.Sprintf("stream %d already opened", streamID))
|
||||
}
|
||||
curr.state = priorityNodeOpen
|
||||
return
|
||||
}
|
||||
|
||||
// RFC 7540, Section 5.3.5:
|
||||
// "All streams are initially assigned a non-exclusive dependency on stream 0x0.
|
||||
// Pushed streams initially depend on their associated stream. In both cases,
|
||||
// streams are assigned a default weight of 16."
|
||||
parent := ws.nodes[options.PusherID]
|
||||
if parent == nil {
|
||||
parent = &ws.root
|
||||
}
|
||||
n := &priorityNode{
|
||||
q: *ws.queuePool.get(),
|
||||
id: streamID,
|
||||
weight: priorityDefaultWeight,
|
||||
state: priorityNodeOpen,
|
||||
}
|
||||
n.setParent(parent)
|
||||
ws.nodes[streamID] = n
|
||||
if streamID > ws.maxID {
|
||||
ws.maxID = streamID
|
||||
}
|
||||
}
|
||||
|
||||
func (ws *priorityWriteScheduler) CloseStream(streamID uint32) {
|
||||
if streamID == 0 {
|
||||
panic("violation of WriteScheduler interface: cannot close stream 0")
|
||||
}
|
||||
if ws.nodes[streamID] == nil {
|
||||
panic(fmt.Sprintf("violation of WriteScheduler interface: unknown stream %d", streamID))
|
||||
}
|
||||
if ws.nodes[streamID].state != priorityNodeOpen {
|
||||
panic(fmt.Sprintf("violation of WriteScheduler interface: stream %d already closed", streamID))
|
||||
}
|
||||
|
||||
n := ws.nodes[streamID]
|
||||
n.state = priorityNodeClosed
|
||||
n.addBytes(-n.bytes)
|
||||
|
||||
q := n.q
|
||||
ws.queuePool.put(&q)
|
||||
n.q.s = nil
|
||||
if ws.maxClosedNodesInTree > 0 {
|
||||
ws.addClosedOrIdleNode(&ws.closedNodes, ws.maxClosedNodesInTree, n)
|
||||
} else {
|
||||
ws.removeNode(n)
|
||||
}
|
||||
}
|
||||
|
||||
func (ws *priorityWriteScheduler) AdjustStream(streamID uint32, priority PriorityParam) {
|
||||
if streamID == 0 {
|
||||
panic("adjustPriority on root")
|
||||
}
|
||||
|
||||
// If streamID does not exist, there are two cases:
|
||||
// - A closed stream that has been removed (this will have ID <= maxID)
|
||||
// - An idle stream that is being used for "grouping" (this will have ID > maxID)
|
||||
n := ws.nodes[streamID]
|
||||
if n == nil {
|
||||
if streamID <= ws.maxID || ws.maxIdleNodesInTree == 0 {
|
||||
return
|
||||
}
|
||||
ws.maxID = streamID
|
||||
n = &priorityNode{
|
||||
q: *ws.queuePool.get(),
|
||||
id: streamID,
|
||||
weight: priorityDefaultWeight,
|
||||
state: priorityNodeIdle,
|
||||
}
|
||||
n.setParent(&ws.root)
|
||||
ws.nodes[streamID] = n
|
||||
ws.addClosedOrIdleNode(&ws.idleNodes, ws.maxIdleNodesInTree, n)
|
||||
}
|
||||
|
||||
// Section 5.3.1: A dependency on a stream that is not currently in the tree
|
||||
// results in that stream being given a default priority (Section 5.3.5).
|
||||
parent := ws.nodes[priority.StreamDep]
|
||||
if parent == nil {
|
||||
n.setParent(&ws.root)
|
||||
n.weight = priorityDefaultWeight
|
||||
return
|
||||
}
|
||||
|
||||
// Ignore if the client tries to make a node its own parent.
|
||||
if n == parent {
|
||||
return
|
||||
}
|
||||
|
||||
// Section 5.3.3:
|
||||
// "If a stream is made dependent on one of its own dependencies, the
|
||||
// formerly dependent stream is first moved to be dependent on the
|
||||
// reprioritized stream's previous parent. The moved dependency retains
|
||||
// its weight."
|
||||
//
|
||||
// That is: if parent depends on n, move parent to depend on n.parent.
|
||||
for x := parent.parent; x != nil; x = x.parent {
|
||||
if x == n {
|
||||
parent.setParent(n.parent)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Section 5.3.3: The exclusive flag causes the stream to become the sole
|
||||
// dependency of its parent stream, causing other dependencies to become
|
||||
// dependent on the exclusive stream.
|
||||
if priority.Exclusive {
|
||||
k := parent.kids
|
||||
for k != nil {
|
||||
next := k.next
|
||||
if k != n {
|
||||
k.setParent(n)
|
||||
}
|
||||
k = next
|
||||
}
|
||||
}
|
||||
|
||||
n.setParent(parent)
|
||||
n.weight = priority.Weight
|
||||
}
|
||||
|
||||
func (ws *priorityWriteScheduler) Push(wr FrameWriteRequest) {
|
||||
var n *priorityNode
|
||||
if id := wr.StreamID(); id == 0 {
|
||||
n = &ws.root
|
||||
} else {
|
||||
n = ws.nodes[id]
|
||||
if n == nil {
|
||||
// id is an idle or closed stream. wr should not be a HEADERS or
|
||||
// DATA frame. However, wr can be a RST_STREAM. In this case, we
|
||||
// push wr onto the root, rather than creating a new priorityNode,
|
||||
// since RST_STREAM is tiny and the stream's priority is unknown
|
||||
// anyway. See issue #17919.
|
||||
if wr.DataSize() > 0 {
|
||||
panic("add DATA on non-open stream")
|
||||
}
|
||||
n = &ws.root
|
||||
}
|
||||
}
|
||||
n.q.push(wr)
|
||||
}
|
||||
|
||||
func (ws *priorityWriteScheduler) Pop() (wr FrameWriteRequest, ok bool) {
|
||||
ws.root.walkReadyInOrder(false, &ws.tmp, func(n *priorityNode, openParent bool) bool {
|
||||
limit := int32(math.MaxInt32)
|
||||
if openParent {
|
||||
limit = ws.writeThrottleLimit
|
||||
}
|
||||
wr, ok = n.q.consume(limit)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
n.addBytes(int64(wr.DataSize()))
|
||||
// If B depends on A and B continuously has data available but A
|
||||
// does not, gradually increase the throttling limit to allow B to
|
||||
// steal more and more bandwidth from A.
|
||||
if openParent {
|
||||
ws.writeThrottleLimit += 1024
|
||||
if ws.writeThrottleLimit < 0 {
|
||||
ws.writeThrottleLimit = math.MaxInt32
|
||||
}
|
||||
} else if ws.enableWriteThrottle {
|
||||
ws.writeThrottleLimit = 1024
|
||||
}
|
||||
return true
|
||||
})
|
||||
return wr, ok
|
||||
}
|
||||
|
||||
func (ws *priorityWriteScheduler) addClosedOrIdleNode(list *[]*priorityNode, maxSize int, n *priorityNode) {
|
||||
if maxSize == 0 {
|
||||
return
|
||||
}
|
||||
if len(*list) == maxSize {
|
||||
// Remove the oldest node, then shift left.
|
||||
ws.removeNode((*list)[0])
|
||||
x := (*list)[1:]
|
||||
copy(*list, x)
|
||||
*list = (*list)[:len(x)]
|
||||
}
|
||||
*list = append(*list, n)
|
||||
}
|
||||
|
||||
func (ws *priorityWriteScheduler) removeNode(n *priorityNode) {
|
||||
for k := n.kids; k != nil; k = k.next {
|
||||
k.setParent(n.parent)
|
||||
}
|
||||
n.setParent(nil)
|
||||
delete(ws.nodes, n.id)
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http2
|
||||
|
||||
import "math"
|
||||
|
||||
// NewRandomWriteScheduler constructs a WriteScheduler that ignores HTTP/2
|
||||
// priorities. Control frames like SETTINGS and PING are written before DATA
|
||||
// frames, but if no control frames are queued and multiple streams have queued
|
||||
// HEADERS or DATA frames, Pop selects a ready stream arbitrarily.
|
||||
func NewRandomWriteScheduler() WriteScheduler {
|
||||
return &randomWriteScheduler{sq: make(map[uint32]*writeQueue)}
|
||||
}
|
||||
|
||||
type randomWriteScheduler struct {
|
||||
// zero are frames not associated with a specific stream.
|
||||
zero writeQueue
|
||||
|
||||
// sq contains the stream-specific queues, keyed by stream ID.
|
||||
// When a stream is idle or closed, it's deleted from the map.
|
||||
sq map[uint32]*writeQueue
|
||||
|
||||
// pool of empty queues for reuse.
|
||||
queuePool writeQueuePool
|
||||
}
|
||||
|
||||
func (ws *randomWriteScheduler) OpenStream(streamID uint32, options OpenStreamOptions) {
|
||||
// no-op: idle streams are not tracked
|
||||
}
|
||||
|
||||
func (ws *randomWriteScheduler) CloseStream(streamID uint32) {
|
||||
q, ok := ws.sq[streamID]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
delete(ws.sq, streamID)
|
||||
ws.queuePool.put(q)
|
||||
}
|
||||
|
||||
func (ws *randomWriteScheduler) AdjustStream(streamID uint32, priority PriorityParam) {
|
||||
// no-op: priorities are ignored
|
||||
}
|
||||
|
||||
func (ws *randomWriteScheduler) Push(wr FrameWriteRequest) {
|
||||
id := wr.StreamID()
|
||||
if id == 0 {
|
||||
ws.zero.push(wr)
|
||||
return
|
||||
}
|
||||
q, ok := ws.sq[id]
|
||||
if !ok {
|
||||
q = ws.queuePool.get()
|
||||
ws.sq[id] = q
|
||||
}
|
||||
q.push(wr)
|
||||
}
|
||||
|
||||
func (ws *randomWriteScheduler) Pop() (FrameWriteRequest, bool) {
|
||||
// Control frames first.
|
||||
if !ws.zero.empty() {
|
||||
return ws.zero.shift(), true
|
||||
}
|
||||
// Iterate over all non-idle streams until finding one that can be consumed.
|
||||
for _, q := range ws.sq {
|
||||
if wr, ok := q.consume(math.MaxInt32); ok {
|
||||
return wr, true
|
||||
}
|
||||
}
|
||||
return FrameWriteRequest{}, false
|
||||
}
|
|
@ -0,0 +1,732 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package idna implements IDNA2008 using the compatibility processing
|
||||
// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
|
||||
// deal with the transition from IDNA2003.
|
||||
//
|
||||
// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
|
||||
// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
|
||||
// UTS #46 is defined in http://www.unicode.org/reports/tr46.
|
||||
// See http://unicode.org/cldr/utility/idna.jsp for a visualization of the
|
||||
// differences between these two standards.
|
||||
package idna // import "golang.org/x/net/idna"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/secure/bidirule"
|
||||
"golang.org/x/text/unicode/bidi"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// NOTE: Unlike common practice in Go APIs, the functions will return a
|
||||
// sanitized domain name in case of errors. Browsers sometimes use a partially
|
||||
// evaluated string as lookup.
|
||||
// TODO: the current error handling is, in my opinion, the least opinionated.
|
||||
// Other strategies are also viable, though:
|
||||
// Option 1) Return an empty string in case of error, but allow the user to
|
||||
// specify explicitly which errors to ignore.
|
||||
// Option 2) Return the partially evaluated string if it is itself a valid
|
||||
// string, otherwise return the empty string in case of error.
|
||||
// Option 3) Option 1 and 2.
|
||||
// Option 4) Always return an empty string for now and implement Option 1 as
|
||||
// needed, and document that the return string may not be empty in case of
|
||||
// error in the future.
|
||||
// I think Option 1 is best, but it is quite opinionated.
|
||||
|
||||
// ToASCII is a wrapper for Punycode.ToASCII.
|
||||
func ToASCII(s string) (string, error) {
|
||||
return Punycode.process(s, true)
|
||||
}
|
||||
|
||||
// ToUnicode is a wrapper for Punycode.ToUnicode.
|
||||
func ToUnicode(s string) (string, error) {
|
||||
return Punycode.process(s, false)
|
||||
}
|
||||
|
||||
// An Option configures a Profile at creation time.
|
||||
type Option func(*options)
|
||||
|
||||
// Transitional sets a Profile to use the Transitional mapping as defined in UTS
|
||||
// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
|
||||
// transitional mapping provides a compromise between IDNA2003 and IDNA2008
|
||||
// compatibility. It is used by most browsers when resolving domain names. This
|
||||
// option is only meaningful if combined with MapForLookup.
|
||||
func Transitional(transitional bool) Option {
|
||||
return func(o *options) { o.transitional = true }
|
||||
}
|
||||
|
||||
// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
|
||||
// are longer than allowed by the RFC.
|
||||
func VerifyDNSLength(verify bool) Option {
|
||||
return func(o *options) { o.verifyDNSLength = verify }
|
||||
}
|
||||
|
||||
// RemoveLeadingDots removes leading label separators. Leading runes that map to
|
||||
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
|
||||
//
|
||||
// This is the behavior suggested by the UTS #46 and is adopted by some
|
||||
// browsers.
|
||||
func RemoveLeadingDots(remove bool) Option {
|
||||
return func(o *options) { o.removeLeadingDots = remove }
|
||||
}
|
||||
|
||||
// ValidateLabels sets whether to check the mandatory label validation criteria
|
||||
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
|
||||
// of hyphens ('-'), normalization, validity of runes, and the context rules.
|
||||
func ValidateLabels(enable bool) Option {
|
||||
return func(o *options) {
|
||||
// Don't override existing mappings, but set one that at least checks
|
||||
// normalization if it is not set.
|
||||
if o.mapping == nil && enable {
|
||||
o.mapping = normalize
|
||||
}
|
||||
o.trie = trie
|
||||
o.validateLabels = enable
|
||||
o.fromPuny = validateFromPunycode
|
||||
}
|
||||
}
|
||||
|
||||
// StrictDomainName limits the set of permissible ASCII characters to those
|
||||
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
|
||||
// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
|
||||
//
|
||||
// This option is useful, for instance, for browsers that allow characters
|
||||
// outside this range, for example a '_' (U+005F LOW LINE). See
|
||||
// http://www.rfc-editor.org/std/std3.txt for more details This option
|
||||
// corresponds to the UseSTD3ASCIIRules option in UTS #46.
|
||||
func StrictDomainName(use bool) Option {
|
||||
return func(o *options) {
|
||||
o.trie = trie
|
||||
o.useSTD3Rules = use
|
||||
o.fromPuny = validateFromPunycode
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: the following options pull in tables. The tables should not be linked
|
||||
// in as long as the options are not used.
|
||||
|
||||
// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
|
||||
// that relies on proper validation of labels should include this rule.
|
||||
func BidiRule() Option {
|
||||
return func(o *options) { o.bidirule = bidirule.ValidString }
|
||||
}
|
||||
|
||||
// ValidateForRegistration sets validation options to verify that a given IDN is
|
||||
// properly formatted for registration as defined by Section 4 of RFC 5891.
|
||||
func ValidateForRegistration() Option {
|
||||
return func(o *options) {
|
||||
o.mapping = validateRegistration
|
||||
StrictDomainName(true)(o)
|
||||
ValidateLabels(true)(o)
|
||||
VerifyDNSLength(true)(o)
|
||||
BidiRule()(o)
|
||||
}
|
||||
}
|
||||
|
||||
// MapForLookup sets validation and mapping options such that a given IDN is
|
||||
// transformed for domain name lookup according to the requirements set out in
|
||||
// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
|
||||
// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
|
||||
// to add this check.
|
||||
//
|
||||
// The mappings include normalization and mapping case, width and other
|
||||
// compatibility mappings.
|
||||
func MapForLookup() Option {
|
||||
return func(o *options) {
|
||||
o.mapping = validateAndMap
|
||||
StrictDomainName(true)(o)
|
||||
ValidateLabels(true)(o)
|
||||
}
|
||||
}
|
||||
|
||||
type options struct {
|
||||
transitional bool
|
||||
useSTD3Rules bool
|
||||
validateLabels bool
|
||||
verifyDNSLength bool
|
||||
removeLeadingDots bool
|
||||
|
||||
trie *idnaTrie
|
||||
|
||||
// fromPuny calls validation rules when converting A-labels to U-labels.
|
||||
fromPuny func(p *Profile, s string) error
|
||||
|
||||
// mapping implements a validation and mapping step as defined in RFC 5895
|
||||
// or UTS 46, tailored to, for example, domain registration or lookup.
|
||||
mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
|
||||
|
||||
// bidirule, if specified, checks whether s conforms to the Bidi Rule
|
||||
// defined in RFC 5893.
|
||||
bidirule func(s string) bool
|
||||
}
|
||||
|
||||
// A Profile defines the configuration of an IDNA mapper.
|
||||
type Profile struct {
|
||||
options
|
||||
}
|
||||
|
||||
func apply(o *options, opts []Option) {
|
||||
for _, f := range opts {
|
||||
f(o)
|
||||
}
|
||||
}
|
||||
|
||||
// New creates a new Profile.
|
||||
//
|
||||
// With no options, the returned Profile is the most permissive and equals the
|
||||
// Punycode Profile. Options can be passed to further restrict the Profile. The
|
||||
// MapForLookup and ValidateForRegistration options set a collection of options,
|
||||
// for lookup and registration purposes respectively, which can be tailored by
|
||||
// adding more fine-grained options, where later options override earlier
|
||||
// options.
|
||||
func New(o ...Option) *Profile {
|
||||
p := &Profile{}
|
||||
apply(&p.options, o)
|
||||
return p
|
||||
}
|
||||
|
||||
// ToASCII converts a domain or domain label to its ASCII form. For example,
|
||||
// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
|
||||
// ToASCII("golang") is "golang". If an error is encountered it will return
|
||||
// an error and a (partially) processed result.
|
||||
func (p *Profile) ToASCII(s string) (string, error) {
|
||||
return p.process(s, true)
|
||||
}
|
||||
|
||||
// ToUnicode converts a domain or domain label to its Unicode form. For example,
|
||||
// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
|
||||
// ToUnicode("golang") is "golang". If an error is encountered it will return
|
||||
// an error and a (partially) processed result.
|
||||
func (p *Profile) ToUnicode(s string) (string, error) {
|
||||
pp := *p
|
||||
pp.transitional = false
|
||||
return pp.process(s, false)
|
||||
}
|
||||
|
||||
// String reports a string with a description of the profile for debugging
|
||||
// purposes. The string format may change with different versions.
|
||||
func (p *Profile) String() string {
|
||||
s := ""
|
||||
if p.transitional {
|
||||
s = "Transitional"
|
||||
} else {
|
||||
s = "NonTransitional"
|
||||
}
|
||||
if p.useSTD3Rules {
|
||||
s += ":UseSTD3Rules"
|
||||
}
|
||||
if p.validateLabels {
|
||||
s += ":ValidateLabels"
|
||||
}
|
||||
if p.verifyDNSLength {
|
||||
s += ":VerifyDNSLength"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
var (
|
||||
// Punycode is a Profile that does raw punycode processing with a minimum
|
||||
// of validation.
|
||||
Punycode *Profile = punycode
|
||||
|
||||
// Lookup is the recommended profile for looking up domain names, according
|
||||
// to Section 5 of RFC 5891. The exact configuration of this profile may
|
||||
// change over time.
|
||||
Lookup *Profile = lookup
|
||||
|
||||
// Display is the recommended profile for displaying domain names.
|
||||
// The configuration of this profile may change over time.
|
||||
Display *Profile = display
|
||||
|
||||
// Registration is the recommended profile for checking whether a given
|
||||
// IDN is valid for registration, according to Section 4 of RFC 5891.
|
||||
Registration *Profile = registration
|
||||
|
||||
punycode = &Profile{}
|
||||
lookup = &Profile{options{
|
||||
transitional: true,
|
||||
useSTD3Rules: true,
|
||||
validateLabels: true,
|
||||
trie: trie,
|
||||
fromPuny: validateFromPunycode,
|
||||
mapping: validateAndMap,
|
||||
bidirule: bidirule.ValidString,
|
||||
}}
|
||||
display = &Profile{options{
|
||||
useSTD3Rules: true,
|
||||
validateLabels: true,
|
||||
trie: trie,
|
||||
fromPuny: validateFromPunycode,
|
||||
mapping: validateAndMap,
|
||||
bidirule: bidirule.ValidString,
|
||||
}}
|
||||
registration = &Profile{options{
|
||||
useSTD3Rules: true,
|
||||
validateLabels: true,
|
||||
verifyDNSLength: true,
|
||||
trie: trie,
|
||||
fromPuny: validateFromPunycode,
|
||||
mapping: validateRegistration,
|
||||
bidirule: bidirule.ValidString,
|
||||
}}
|
||||
|
||||
// TODO: profiles
|
||||
// Register: recommended for approving domain names: don't do any mappings
|
||||
// but rather reject on invalid input. Bundle or block deviation characters.
|
||||
)
|
||||
|
||||
type labelError struct{ label, code_ string }
|
||||
|
||||
func (e labelError) code() string { return e.code_ }
|
||||
func (e labelError) Error() string {
|
||||
return fmt.Sprintf("idna: invalid label %q", e.label)
|
||||
}
|
||||
|
||||
type runeError rune
|
||||
|
||||
func (e runeError) code() string { return "P1" }
|
||||
func (e runeError) Error() string {
|
||||
return fmt.Sprintf("idna: disallowed rune %U", e)
|
||||
}
|
||||
|
||||
// process implements the algorithm described in section 4 of UTS #46,
|
||||
// see http://www.unicode.org/reports/tr46.
|
||||
func (p *Profile) process(s string, toASCII bool) (string, error) {
|
||||
var err error
|
||||
var isBidi bool
|
||||
if p.mapping != nil {
|
||||
s, isBidi, err = p.mapping(p, s)
|
||||
}
|
||||
// Remove leading empty labels.
|
||||
if p.removeLeadingDots {
|
||||
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
|
||||
}
|
||||
}
|
||||
// TODO: allow for a quick check of the tables data.
|
||||
// It seems like we should only create this error on ToASCII, but the
|
||||
// UTS 46 conformance tests suggests we should always check this.
|
||||
if err == nil && p.verifyDNSLength && s == "" {
|
||||
err = &labelError{s, "A4"}
|
||||
}
|
||||
labels := labelIter{orig: s}
|
||||
for ; !labels.done(); labels.next() {
|
||||
label := labels.label()
|
||||
if label == "" {
|
||||
// Empty labels are not okay. The label iterator skips the last
|
||||
// label if it is empty.
|
||||
if err == nil && p.verifyDNSLength {
|
||||
err = &labelError{s, "A4"}
|
||||
}
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(label, acePrefix) {
|
||||
u, err2 := decode(label[len(acePrefix):])
|
||||
if err2 != nil {
|
||||
if err == nil {
|
||||
err = err2
|
||||
}
|
||||
// Spec says keep the old label.
|
||||
continue
|
||||
}
|
||||
isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
|
||||
labels.set(u)
|
||||
if err == nil && p.validateLabels {
|
||||
err = p.fromPuny(p, u)
|
||||
}
|
||||
if err == nil {
|
||||
// This should be called on NonTransitional, according to the
|
||||
// spec, but that currently does not have any effect. Use the
|
||||
// original profile to preserve options.
|
||||
err = p.validateLabel(u)
|
||||
}
|
||||
} else if err == nil {
|
||||
err = p.validateLabel(label)
|
||||
}
|
||||
}
|
||||
if isBidi && p.bidirule != nil && err == nil {
|
||||
for labels.reset(); !labels.done(); labels.next() {
|
||||
if !p.bidirule(labels.label()) {
|
||||
err = &labelError{s, "B"}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if toASCII {
|
||||
for labels.reset(); !labels.done(); labels.next() {
|
||||
label := labels.label()
|
||||
if !ascii(label) {
|
||||
a, err2 := encode(acePrefix, label)
|
||||
if err == nil {
|
||||
err = err2
|
||||
}
|
||||
label = a
|
||||
labels.set(a)
|
||||
}
|
||||
n := len(label)
|
||||
if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
|
||||
err = &labelError{label, "A4"}
|
||||
}
|
||||
}
|
||||
}
|
||||
s = labels.result()
|
||||
if toASCII && p.verifyDNSLength && err == nil {
|
||||
// Compute the length of the domain name minus the root label and its dot.
|
||||
n := len(s)
|
||||
if n > 0 && s[n-1] == '.' {
|
||||
n--
|
||||
}
|
||||
if len(s) < 1 || n > 253 {
|
||||
err = &labelError{s, "A4"}
|
||||
}
|
||||
}
|
||||
return s, err
|
||||
}
|
||||
|
||||
func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
|
||||
// TODO: consider first doing a quick check to see if any of these checks
|
||||
// need to be done. This will make it slower in the general case, but
|
||||
// faster in the common case.
|
||||
mapped = norm.NFC.String(s)
|
||||
isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
|
||||
return mapped, isBidi, nil
|
||||
}
|
||||
|
||||
func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
|
||||
// TODO: filter need for normalization in loop below.
|
||||
if !norm.NFC.IsNormalString(s) {
|
||||
return s, false, &labelError{s, "V1"}
|
||||
}
|
||||
for i := 0; i < len(s); {
|
||||
v, sz := trie.lookupString(s[i:])
|
||||
if sz == 0 {
|
||||
return s, bidi, runeError(utf8.RuneError)
|
||||
}
|
||||
bidi = bidi || info(v).isBidi(s[i:])
|
||||
// Copy bytes not copied so far.
|
||||
switch p.simplify(info(v).category()) {
|
||||
// TODO: handle the NV8 defined in the Unicode idna data set to allow
|
||||
// for strict conformance to IDNA2008.
|
||||
case valid, deviation:
|
||||
case disallowed, mapped, unknown, ignored:
|
||||
r, _ := utf8.DecodeRuneInString(s[i:])
|
||||
return s, bidi, runeError(r)
|
||||
}
|
||||
i += sz
|
||||
}
|
||||
return s, bidi, nil
|
||||
}
|
||||
|
||||
func (c info) isBidi(s string) bool {
|
||||
if !c.isMapped() {
|
||||
return c&attributesMask == rtl
|
||||
}
|
||||
// TODO: also store bidi info for mapped data. This is possible, but a bit
|
||||
// cumbersome and not for the common case.
|
||||
p, _ := bidi.LookupString(s)
|
||||
switch p.Class() {
|
||||
case bidi.R, bidi.AL, bidi.AN:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
|
||||
var (
|
||||
b []byte
|
||||
k int
|
||||
)
|
||||
// combinedInfoBits contains the or-ed bits of all runes. We use this
|
||||
// to derive the mayNeedNorm bit later. This may trigger normalization
|
||||
// overeagerly, but it will not do so in the common case. The end result
|
||||
// is another 10% saving on BenchmarkProfile for the common case.
|
||||
var combinedInfoBits info
|
||||
for i := 0; i < len(s); {
|
||||
v, sz := trie.lookupString(s[i:])
|
||||
if sz == 0 {
|
||||
b = append(b, s[k:i]...)
|
||||
b = append(b, "\ufffd"...)
|
||||
k = len(s)
|
||||
if err == nil {
|
||||
err = runeError(utf8.RuneError)
|
||||
}
|
||||
break
|
||||
}
|
||||
combinedInfoBits |= info(v)
|
||||
bidi = bidi || info(v).isBidi(s[i:])
|
||||
start := i
|
||||
i += sz
|
||||
// Copy bytes not copied so far.
|
||||
switch p.simplify(info(v).category()) {
|
||||
case valid:
|
||||
continue
|
||||
case disallowed:
|
||||
if err == nil {
|
||||
r, _ := utf8.DecodeRuneInString(s[start:])
|
||||
err = runeError(r)
|
||||
}
|
||||
continue
|
||||
case mapped, deviation:
|
||||
b = append(b, s[k:start]...)
|
||||
b = info(v).appendMapping(b, s[start:i])
|
||||
case ignored:
|
||||
b = append(b, s[k:start]...)
|
||||
// drop the rune
|
||||
case unknown:
|
||||
b = append(b, s[k:start]...)
|
||||
b = append(b, "\ufffd"...)
|
||||
}
|
||||
k = i
|
||||
}
|
||||
if k == 0 {
|
||||
// No changes so far.
|
||||
if combinedInfoBits&mayNeedNorm != 0 {
|
||||
s = norm.NFC.String(s)
|
||||
}
|
||||
} else {
|
||||
b = append(b, s[k:]...)
|
||||
if norm.NFC.QuickSpan(b) != len(b) {
|
||||
b = norm.NFC.Bytes(b)
|
||||
}
|
||||
// TODO: the punycode converters require strings as input.
|
||||
s = string(b)
|
||||
}
|
||||
return s, bidi, err
|
||||
}
|
||||
|
||||
// A labelIter allows iterating over domain name labels.
|
||||
type labelIter struct {
|
||||
orig string
|
||||
slice []string
|
||||
curStart int
|
||||
curEnd int
|
||||
i int
|
||||
}
|
||||
|
||||
func (l *labelIter) reset() {
|
||||
l.curStart = 0
|
||||
l.curEnd = 0
|
||||
l.i = 0
|
||||
}
|
||||
|
||||
func (l *labelIter) done() bool {
|
||||
return l.curStart >= len(l.orig)
|
||||
}
|
||||
|
||||
func (l *labelIter) result() string {
|
||||
if l.slice != nil {
|
||||
return strings.Join(l.slice, ".")
|
||||
}
|
||||
return l.orig
|
||||
}
|
||||
|
||||
func (l *labelIter) label() string {
|
||||
if l.slice != nil {
|
||||
return l.slice[l.i]
|
||||
}
|
||||
p := strings.IndexByte(l.orig[l.curStart:], '.')
|
||||
l.curEnd = l.curStart + p
|
||||
if p == -1 {
|
||||
l.curEnd = len(l.orig)
|
||||
}
|
||||
return l.orig[l.curStart:l.curEnd]
|
||||
}
|
||||
|
||||
// next sets the value to the next label. It skips the last label if it is empty.
|
||||
func (l *labelIter) next() {
|
||||
l.i++
|
||||
if l.slice != nil {
|
||||
if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
|
||||
l.curStart = len(l.orig)
|
||||
}
|
||||
} else {
|
||||
l.curStart = l.curEnd + 1
|
||||
if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
|
||||
l.curStart = len(l.orig)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *labelIter) set(s string) {
|
||||
if l.slice == nil {
|
||||
l.slice = strings.Split(l.orig, ".")
|
||||
}
|
||||
l.slice[l.i] = s
|
||||
}
|
||||
|
||||
// acePrefix is the ASCII Compatible Encoding prefix.
|
||||
const acePrefix = "xn--"
|
||||
|
||||
func (p *Profile) simplify(cat category) category {
|
||||
switch cat {
|
||||
case disallowedSTD3Mapped:
|
||||
if p.useSTD3Rules {
|
||||
cat = disallowed
|
||||
} else {
|
||||
cat = mapped
|
||||
}
|
||||
case disallowedSTD3Valid:
|
||||
if p.useSTD3Rules {
|
||||
cat = disallowed
|
||||
} else {
|
||||
cat = valid
|
||||
}
|
||||
case deviation:
|
||||
if !p.transitional {
|
||||
cat = valid
|
||||
}
|
||||
case validNV8, validXV8:
|
||||
// TODO: handle V2008
|
||||
cat = valid
|
||||
}
|
||||
return cat
|
||||
}
|
||||
|
||||
func validateFromPunycode(p *Profile, s string) error {
|
||||
if !norm.NFC.IsNormalString(s) {
|
||||
return &labelError{s, "V1"}
|
||||
}
|
||||
// TODO: detect whether string may have to be normalized in the following
|
||||
// loop.
|
||||
for i := 0; i < len(s); {
|
||||
v, sz := trie.lookupString(s[i:])
|
||||
if sz == 0 {
|
||||
return runeError(utf8.RuneError)
|
||||
}
|
||||
if c := p.simplify(info(v).category()); c != valid && c != deviation {
|
||||
return &labelError{s, "V6"}
|
||||
}
|
||||
i += sz
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
zwnj = "\u200c"
|
||||
zwj = "\u200d"
|
||||
)
|
||||
|
||||
type joinState int8
|
||||
|
||||
const (
|
||||
stateStart joinState = iota
|
||||
stateVirama
|
||||
stateBefore
|
||||
stateBeforeVirama
|
||||
stateAfter
|
||||
stateFAIL
|
||||
)
|
||||
|
||||
var joinStates = [][numJoinTypes]joinState{
|
||||
stateStart: {
|
||||
joiningL: stateBefore,
|
||||
joiningD: stateBefore,
|
||||
joinZWNJ: stateFAIL,
|
||||
joinZWJ: stateFAIL,
|
||||
joinVirama: stateVirama,
|
||||
},
|
||||
stateVirama: {
|
||||
joiningL: stateBefore,
|
||||
joiningD: stateBefore,
|
||||
},
|
||||
stateBefore: {
|
||||
joiningL: stateBefore,
|
||||
joiningD: stateBefore,
|
||||
joiningT: stateBefore,
|
||||
joinZWNJ: stateAfter,
|
||||
joinZWJ: stateFAIL,
|
||||
joinVirama: stateBeforeVirama,
|
||||
},
|
||||
stateBeforeVirama: {
|
||||
joiningL: stateBefore,
|
||||
joiningD: stateBefore,
|
||||
joiningT: stateBefore,
|
||||
},
|
||||
stateAfter: {
|
||||
joiningL: stateFAIL,
|
||||
joiningD: stateBefore,
|
||||
joiningT: stateAfter,
|
||||
joiningR: stateStart,
|
||||
joinZWNJ: stateFAIL,
|
||||
joinZWJ: stateFAIL,
|
||||
joinVirama: stateAfter, // no-op as we can't accept joiners here
|
||||
},
|
||||
stateFAIL: {
|
||||
0: stateFAIL,
|
||||
joiningL: stateFAIL,
|
||||
joiningD: stateFAIL,
|
||||
joiningT: stateFAIL,
|
||||
joiningR: stateFAIL,
|
||||
joinZWNJ: stateFAIL,
|
||||
joinZWJ: stateFAIL,
|
||||
joinVirama: stateFAIL,
|
||||
},
|
||||
}
|
||||
|
||||
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
|
||||
// already implicitly satisfied by the overall implementation.
|
||||
func (p *Profile) validateLabel(s string) (err error) {
|
||||
if s == "" {
|
||||
if p.verifyDNSLength {
|
||||
return &labelError{s, "A4"}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if !p.validateLabels {
|
||||
return nil
|
||||
}
|
||||
trie := p.trie // p.validateLabels is only set if trie is set.
|
||||
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
|
||||
return &labelError{s, "V2"}
|
||||
}
|
||||
if s[0] == '-' || s[len(s)-1] == '-' {
|
||||
return &labelError{s, "V3"}
|
||||
}
|
||||
// TODO: merge the use of this in the trie.
|
||||
v, sz := trie.lookupString(s)
|
||||
x := info(v)
|
||||
if x.isModifier() {
|
||||
return &labelError{s, "V5"}
|
||||
}
|
||||
// Quickly return in the absence of zero-width (non) joiners.
|
||||
if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
|
||||
return nil
|
||||
}
|
||||
st := stateStart
|
||||
for i := 0; ; {
|
||||
jt := x.joinType()
|
||||
if s[i:i+sz] == zwj {
|
||||
jt = joinZWJ
|
||||
} else if s[i:i+sz] == zwnj {
|
||||
jt = joinZWNJ
|
||||
}
|
||||
st = joinStates[st][jt]
|
||||
if x.isViramaModifier() {
|
||||
st = joinStates[st][joinVirama]
|
||||
}
|
||||
if i += sz; i == len(s) {
|
||||
break
|
||||
}
|
||||
v, sz = trie.lookupString(s[i:])
|
||||
x = info(v)
|
||||
}
|
||||
if st == stateFAIL || st == stateAfter {
|
||||
return &labelError{s, "C"}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func ascii(s string) bool {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] >= utf8.RuneSelf {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package idna
|
||||
|
||||
// This file implements the Punycode algorithm from RFC 3492.
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// These parameter values are specified in section 5.
|
||||
//
|
||||
// All computation is done with int32s, so that overflow behavior is identical
|
||||
// regardless of whether int is 32-bit or 64-bit.
|
||||
const (
|
||||
base int32 = 36
|
||||
damp int32 = 700
|
||||
initialBias int32 = 72
|
||||
initialN int32 = 128
|
||||
skew int32 = 38
|
||||
tmax int32 = 26
|
||||
tmin int32 = 1
|
||||
)
|
||||
|
||||
func punyError(s string) error { return &labelError{s, "A3"} }
|
||||
|
||||
// decode decodes a string as specified in section 6.2.
|
||||
func decode(encoded string) (string, error) {
|
||||
if encoded == "" {
|
||||
return "", nil
|
||||
}
|
||||
pos := 1 + strings.LastIndex(encoded, "-")
|
||||
if pos == 1 {
|
||||
return "", punyError(encoded)
|
||||
}
|
||||
if pos == len(encoded) {
|
||||
return encoded[:len(encoded)-1], nil
|
||||
}
|
||||
output := make([]rune, 0, len(encoded))
|
||||
if pos != 0 {
|
||||
for _, r := range encoded[:pos-1] {
|
||||
output = append(output, r)
|
||||
}
|
||||
}
|
||||
i, n, bias := int32(0), initialN, initialBias
|
||||
for pos < len(encoded) {
|
||||
oldI, w := i, int32(1)
|
||||
for k := base; ; k += base {
|
||||
if pos == len(encoded) {
|
||||
return "", punyError(encoded)
|
||||
}
|
||||
digit, ok := decodeDigit(encoded[pos])
|
||||
if !ok {
|
||||
return "", punyError(encoded)
|
||||
}
|
||||
pos++
|
||||
i += digit * w
|
||||
if i < 0 {
|
||||
return "", punyError(encoded)
|
||||
}
|
||||
t := k - bias
|
||||
if t < tmin {
|
||||
t = tmin
|
||||
} else if t > tmax {
|
||||
t = tmax
|
||||
}
|
||||
if digit < t {
|
||||
break
|
||||
}
|
||||
w *= base - t
|
||||
if w >= math.MaxInt32/base {
|
||||
return "", punyError(encoded)
|
||||
}
|
||||
}
|
||||
x := int32(len(output) + 1)
|
||||
bias = adapt(i-oldI, x, oldI == 0)
|
||||
n += i / x
|
||||
i %= x
|
||||
if n > utf8.MaxRune || len(output) >= 1024 {
|
||||
return "", punyError(encoded)
|
||||
}
|
||||
output = append(output, 0)
|
||||
copy(output[i+1:], output[i:])
|
||||
output[i] = n
|
||||
i++
|
||||
}
|
||||
return string(output), nil
|
||||
}
|
||||
|
||||
// encode encodes a string as specified in section 6.3 and prepends prefix to
|
||||
// the result.
|
||||
//
|
||||
// The "while h < length(input)" line in the specification becomes "for
|
||||
// remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
|
||||
func encode(prefix, s string) (string, error) {
|
||||
output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
|
||||
copy(output, prefix)
|
||||
delta, n, bias := int32(0), initialN, initialBias
|
||||
b, remaining := int32(0), int32(0)
|
||||
for _, r := range s {
|
||||
if r < 0x80 {
|
||||
b++
|
||||
output = append(output, byte(r))
|
||||
} else {
|
||||
remaining++
|
||||
}
|
||||
}
|
||||
h := b
|
||||
if b > 0 {
|
||||
output = append(output, '-')
|
||||
}
|
||||
for remaining != 0 {
|
||||
m := int32(0x7fffffff)
|
||||
for _, r := range s {
|
||||
if m > r && r >= n {
|
||||
m = r
|
||||
}
|
||||
}
|
||||
delta += (m - n) * (h + 1)
|
||||
if delta < 0 {
|
||||
return "", punyError(s)
|
||||
}
|
||||
n = m
|
||||
for _, r := range s {
|
||||
if r < n {
|
||||
delta++
|
||||
if delta < 0 {
|
||||
return "", punyError(s)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if r > n {
|
||||
continue
|
||||
}
|
||||
q := delta
|
||||
for k := base; ; k += base {
|
||||
t := k - bias
|
||||
if t < tmin {
|
||||
t = tmin
|
||||
} else if t > tmax {
|
||||
t = tmax
|
||||
}
|
||||
if q < t {
|
||||
break
|
||||
}
|
||||
output = append(output, encodeDigit(t+(q-t)%(base-t)))
|
||||
q = (q - t) / (base - t)
|
||||
}
|
||||
output = append(output, encodeDigit(q))
|
||||
bias = adapt(delta, h+1, h == b)
|
||||
delta = 0
|
||||
h++
|
||||
remaining--
|
||||
}
|
||||
delta++
|
||||
n++
|
||||
}
|
||||
return string(output), nil
|
||||
}
|
||||
|
||||
func decodeDigit(x byte) (digit int32, ok bool) {
|
||||
switch {
|
||||
case '0' <= x && x <= '9':
|
||||
return int32(x - ('0' - 26)), true
|
||||
case 'A' <= x && x <= 'Z':
|
||||
return int32(x - 'A'), true
|
||||
case 'a' <= x && x <= 'z':
|
||||
return int32(x - 'a'), true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func encodeDigit(digit int32) byte {
|
||||
switch {
|
||||
case 0 <= digit && digit < 26:
|
||||
return byte(digit + 'a')
|
||||
case 26 <= digit && digit < 36:
|
||||
return byte(digit + ('0' - 26))
|
||||
}
|
||||
panic("idna: internal error in punycode encoding")
|
||||
}
|
||||
|
||||
// adapt is the bias adaptation function specified in section 6.1.
|
||||
func adapt(delta, numPoints int32, firstTime bool) int32 {
|
||||
if firstTime {
|
||||
delta /= damp
|
||||
} else {
|
||||
delta /= 2
|
||||
}
|
||||
delta += delta / numPoints
|
||||
k := int32(0)
|
||||
for delta > ((base-tmin)*tmax)/2 {
|
||||
delta /= base - tmin
|
||||
k += base
|
||||
}
|
||||
return k + (base-tmin+1)*delta/(delta+skew)
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,72 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package idna
|
||||
|
||||
// appendMapping appends the mapping for the respective rune. isMapped must be
|
||||
// true. A mapping is a categorization of a rune as defined in UTS #46.
|
||||
func (c info) appendMapping(b []byte, s string) []byte {
|
||||
index := int(c >> indexShift)
|
||||
if c&xorBit == 0 {
|
||||
s := mappings[index:]
|
||||
return append(b, s[1:s[0]+1]...)
|
||||
}
|
||||
b = append(b, s...)
|
||||
if c&inlineXOR == inlineXOR {
|
||||
// TODO: support and handle two-byte inline masks
|
||||
b[len(b)-1] ^= byte(index)
|
||||
} else {
|
||||
for p := len(b) - int(xorData[index]); p < len(b); p++ {
|
||||
index++
|
||||
b[p] ^= xorData[index]
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// Sparse block handling code.
|
||||
|
||||
type valueRange struct {
|
||||
value uint16 // header: value:stride
|
||||
lo, hi byte // header: lo:n
|
||||
}
|
||||
|
||||
type sparseBlocks struct {
|
||||
values []valueRange
|
||||
offset []uint16
|
||||
}
|
||||
|
||||
var idnaSparse = sparseBlocks{
|
||||
values: idnaSparseValues[:],
|
||||
offset: idnaSparseOffset[:],
|
||||
}
|
||||
|
||||
// Don't use newIdnaTrie to avoid unconditional linking in of the table.
|
||||
var trie = &idnaTrie{}
|
||||
|
||||
// lookup determines the type of block n and looks up the value for b.
|
||||
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
|
||||
// is a list of ranges with an accompanying value. Given a matching range r,
|
||||
// the value for b is by r.value + (b - r.lo) * stride.
|
||||
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||
offset := t.offset[n]
|
||||
header := t.values[offset]
|
||||
lo := offset + 1
|
||||
hi := lo + uint16(header.lo)
|
||||
for lo < hi {
|
||||
m := lo + (hi-lo)/2
|
||||
r := t.values[m]
|
||||
if r.lo <= b && b <= r.hi {
|
||||
return r.value + uint16(b-r.lo)*header.value
|
||||
}
|
||||
if b < r.lo {
|
||||
hi = m
|
||||
} else {
|
||||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package idna
|
||||
|
||||
// This file contains definitions for interpreting the trie value of the idna
|
||||
// trie generated by "go run gen*.go". It is shared by both the generator
|
||||
// program and the resultant package. Sharing is achieved by the generator
|
||||
// copying gen_trieval.go to trieval.go and changing what's above this comment.
|
||||
|
||||
// info holds information from the IDNA mapping table for a single rune. It is
|
||||
// the value returned by a trie lookup. In most cases, all information fits in
|
||||
// a 16-bit value. For mappings, this value may contain an index into a slice
|
||||
// with the mapped string. Such mappings can consist of the actual mapped value
|
||||
// or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
|
||||
// input rune. This technique is used by the cases packages and reduces the
|
||||
// table size significantly.
|
||||
//
|
||||
// The per-rune values have the following format:
|
||||
//
|
||||
// if mapped {
|
||||
// if inlinedXOR {
|
||||
// 15..13 inline XOR marker
|
||||
// 12..11 unused
|
||||
// 10..3 inline XOR mask
|
||||
// } else {
|
||||
// 15..3 index into xor or mapping table
|
||||
// }
|
||||
// } else {
|
||||
// 15..14 unused
|
||||
// 13 mayNeedNorm
|
||||
// 12..11 attributes
|
||||
// 10..8 joining type
|
||||
// 7..3 category type
|
||||
// }
|
||||
// 2 use xor pattern
|
||||
// 1..0 mapped category
|
||||
//
|
||||
// See the definitions below for a more detailed description of the various
|
||||
// bits.
|
||||
type info uint16
|
||||
|
||||
const (
|
||||
catSmallMask = 0x3
|
||||
catBigMask = 0xF8
|
||||
indexShift = 3
|
||||
xorBit = 0x4 // interpret the index as an xor pattern
|
||||
inlineXOR = 0xE000 // These bits are set if the XOR pattern is inlined.
|
||||
|
||||
joinShift = 8
|
||||
joinMask = 0x07
|
||||
|
||||
// Attributes
|
||||
attributesMask = 0x1800
|
||||
viramaModifier = 0x1800
|
||||
modifier = 0x1000
|
||||
rtl = 0x0800
|
||||
|
||||
mayNeedNorm = 0x2000
|
||||
)
|
||||
|
||||
// A category corresponds to a category defined in the IDNA mapping table.
|
||||
type category uint16
|
||||
|
||||
const (
|
||||
unknown category = 0 // not currently defined in unicode.
|
||||
mapped category = 1
|
||||
disallowedSTD3Mapped category = 2
|
||||
deviation category = 3
|
||||
)
|
||||
|
||||
const (
|
||||
valid category = 0x08
|
||||
validNV8 category = 0x18
|
||||
validXV8 category = 0x28
|
||||
disallowed category = 0x40
|
||||
disallowedSTD3Valid category = 0x80
|
||||
ignored category = 0xC0
|
||||
)
|
||||
|
||||
// join types and additional rune information
|
||||
const (
|
||||
joiningL = (iota + 1)
|
||||
joiningD
|
||||
joiningT
|
||||
joiningR
|
||||
|
||||
//the following types are derived during processing
|
||||
joinZWJ
|
||||
joinZWNJ
|
||||
joinVirama
|
||||
numJoinTypes
|
||||
)
|
||||
|
||||
func (c info) isMapped() bool {
|
||||
return c&0x3 != 0
|
||||
}
|
||||
|
||||
func (c info) category() category {
|
||||
small := c & catSmallMask
|
||||
if small != 0 {
|
||||
return category(small)
|
||||
}
|
||||
return category(c & catBigMask)
|
||||
}
|
||||
|
||||
func (c info) joinType() info {
|
||||
if c.isMapped() {
|
||||
return 0
|
||||
}
|
||||
return (c >> joinShift) & joinMask
|
||||
}
|
||||
|
||||
func (c info) isModifier() bool {
|
||||
return c&(modifier|catSmallMask) == modifier
|
||||
}
|
||||
|
||||
func (c info) isViramaModifier() bool {
|
||||
return c&(attributesMask|catSmallMask) == viramaModifier
|
||||
}
|
|
@ -0,0 +1,351 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package httplex contains rules around lexical matters of various
|
||||
// HTTP-related specifications.
|
||||
//
|
||||
// This package is shared by the standard library (which vendors it)
|
||||
// and x/net/http2. It comes with no API stability promise.
|
||||
package httplex
|
||||
|
||||
import (
|
||||
"net"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/net/idna"
|
||||
)
|
||||
|
||||
var isTokenTable = [127]bool{
|
||||
'!': true,
|
||||
'#': true,
|
||||
'$': true,
|
||||
'%': true,
|
||||
'&': true,
|
||||
'\'': true,
|
||||
'*': true,
|
||||
'+': true,
|
||||
'-': true,
|
||||
'.': true,
|
||||
'0': true,
|
||||
'1': true,
|
||||
'2': true,
|
||||
'3': true,
|
||||
'4': true,
|
||||
'5': true,
|
||||
'6': true,
|
||||
'7': true,
|
||||
'8': true,
|
||||
'9': true,
|
||||
'A': true,
|
||||
'B': true,
|
||||
'C': true,
|
||||
'D': true,
|
||||
'E': true,
|
||||
'F': true,
|
||||
'G': true,
|
||||
'H': true,
|
||||
'I': true,
|
||||
'J': true,
|
||||
'K': true,
|
||||
'L': true,
|
||||
'M': true,
|
||||
'N': true,
|
||||
'O': true,
|
||||
'P': true,
|
||||
'Q': true,
|
||||
'R': true,
|
||||
'S': true,
|
||||
'T': true,
|
||||
'U': true,
|
||||
'W': true,
|
||||
'V': true,
|
||||
'X': true,
|
||||
'Y': true,
|
||||
'Z': true,
|
||||
'^': true,
|
||||
'_': true,
|
||||
'`': true,
|
||||
'a': true,
|
||||
'b': true,
|
||||
'c': true,
|
||||
'd': true,
|
||||
'e': true,
|
||||
'f': true,
|
||||
'g': true,
|
||||
'h': true,
|
||||
'i': true,
|
||||
'j': true,
|
||||
'k': true,
|
||||
'l': true,
|
||||
'm': true,
|
||||
'n': true,
|
||||
'o': true,
|
||||
'p': true,
|
||||
'q': true,
|
||||
'r': true,
|
||||
's': true,
|
||||
't': true,
|
||||
'u': true,
|
||||
'v': true,
|
||||
'w': true,
|
||||
'x': true,
|
||||
'y': true,
|
||||
'z': true,
|
||||
'|': true,
|
||||
'~': true,
|
||||
}
|
||||
|
||||
func IsTokenRune(r rune) bool {
|
||||
i := int(r)
|
||||
return i < len(isTokenTable) && isTokenTable[i]
|
||||
}
|
||||
|
||||
func isNotToken(r rune) bool {
|
||||
return !IsTokenRune(r)
|
||||
}
|
||||
|
||||
// HeaderValuesContainsToken reports whether any string in values
|
||||
// contains the provided token, ASCII case-insensitively.
|
||||
func HeaderValuesContainsToken(values []string, token string) bool {
|
||||
for _, v := range values {
|
||||
if headerValueContainsToken(v, token) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isOWS reports whether b is an optional whitespace byte, as defined
|
||||
// by RFC 7230 section 3.2.3.
|
||||
func isOWS(b byte) bool { return b == ' ' || b == '\t' }
|
||||
|
||||
// trimOWS returns x with all optional whitespace removes from the
|
||||
// beginning and end.
|
||||
func trimOWS(x string) string {
|
||||
// TODO: consider using strings.Trim(x, " \t") instead,
|
||||
// if and when it's fast enough. See issue 10292.
|
||||
// But this ASCII-only code will probably always beat UTF-8
|
||||
// aware code.
|
||||
for len(x) > 0 && isOWS(x[0]) {
|
||||
x = x[1:]
|
||||
}
|
||||
for len(x) > 0 && isOWS(x[len(x)-1]) {
|
||||
x = x[:len(x)-1]
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// headerValueContainsToken reports whether v (assumed to be a
|
||||
// 0#element, in the ABNF extension described in RFC 7230 section 7)
|
||||
// contains token amongst its comma-separated tokens, ASCII
|
||||
// case-insensitively.
|
||||
func headerValueContainsToken(v string, token string) bool {
|
||||
v = trimOWS(v)
|
||||
if comma := strings.IndexByte(v, ','); comma != -1 {
|
||||
return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
|
||||
}
|
||||
return tokenEqual(v, token)
|
||||
}
|
||||
|
||||
// lowerASCII returns the ASCII lowercase version of b.
|
||||
func lowerASCII(b byte) byte {
|
||||
if 'A' <= b && b <= 'Z' {
|
||||
return b + ('a' - 'A')
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
|
||||
func tokenEqual(t1, t2 string) bool {
|
||||
if len(t1) != len(t2) {
|
||||
return false
|
||||
}
|
||||
for i, b := range t1 {
|
||||
if b >= utf8.RuneSelf {
|
||||
// No UTF-8 or non-ASCII allowed in tokens.
|
||||
return false
|
||||
}
|
||||
if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// isLWS reports whether b is linear white space, according
|
||||
// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
|
||||
// LWS = [CRLF] 1*( SP | HT )
|
||||
func isLWS(b byte) bool { return b == ' ' || b == '\t' }
|
||||
|
||||
// isCTL reports whether b is a control byte, according
|
||||
// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
|
||||
// CTL = <any US-ASCII control character
|
||||
// (octets 0 - 31) and DEL (127)>
|
||||
func isCTL(b byte) bool {
|
||||
const del = 0x7f // a CTL
|
||||
return b < ' ' || b == del
|
||||
}
|
||||
|
||||
// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
|
||||
// HTTP/2 imposes the additional restriction that uppercase ASCII
|
||||
// letters are not allowed.
|
||||
//
|
||||
// RFC 7230 says:
|
||||
// header-field = field-name ":" OWS field-value OWS
|
||||
// field-name = token
|
||||
// token = 1*tchar
|
||||
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
|
||||
// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
|
||||
func ValidHeaderFieldName(v string) bool {
|
||||
if len(v) == 0 {
|
||||
return false
|
||||
}
|
||||
for _, r := range v {
|
||||
if !IsTokenRune(r) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ValidHostHeader reports whether h is a valid host header.
|
||||
func ValidHostHeader(h string) bool {
|
||||
// The latest spec is actually this:
|
||||
//
|
||||
// http://tools.ietf.org/html/rfc7230#section-5.4
|
||||
// Host = uri-host [ ":" port ]
|
||||
//
|
||||
// Where uri-host is:
|
||||
// http://tools.ietf.org/html/rfc3986#section-3.2.2
|
||||
//
|
||||
// But we're going to be much more lenient for now and just
|
||||
// search for any byte that's not a valid byte in any of those
|
||||
// expressions.
|
||||
for i := 0; i < len(h); i++ {
|
||||
if !validHostByte[h[i]] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// See the validHostHeader comment.
|
||||
var validHostByte = [256]bool{
|
||||
'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
|
||||
'8': true, '9': true,
|
||||
|
||||
'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
|
||||
'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
|
||||
'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
|
||||
'y': true, 'z': true,
|
||||
|
||||
'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
|
||||
'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
|
||||
'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
|
||||
'Y': true, 'Z': true,
|
||||
|
||||
'!': true, // sub-delims
|
||||
'$': true, // sub-delims
|
||||
'%': true, // pct-encoded (and used in IPv6 zones)
|
||||
'&': true, // sub-delims
|
||||
'(': true, // sub-delims
|
||||
')': true, // sub-delims
|
||||
'*': true, // sub-delims
|
||||
'+': true, // sub-delims
|
||||
',': true, // sub-delims
|
||||
'-': true, // unreserved
|
||||
'.': true, // unreserved
|
||||
':': true, // IPv6address + Host expression's optional port
|
||||
';': true, // sub-delims
|
||||
'=': true, // sub-delims
|
||||
'[': true,
|
||||
'\'': true, // sub-delims
|
||||
']': true,
|
||||
'_': true, // unreserved
|
||||
'~': true, // unreserved
|
||||
}
|
||||
|
||||
// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
|
||||
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
|
||||
//
|
||||
// message-header = field-name ":" [ field-value ]
|
||||
// field-value = *( field-content | LWS )
|
||||
// field-content = <the OCTETs making up the field-value
|
||||
// and consisting of either *TEXT or combinations
|
||||
// of token, separators, and quoted-string>
|
||||
//
|
||||
// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
|
||||
//
|
||||
// TEXT = <any OCTET except CTLs,
|
||||
// but including LWS>
|
||||
// LWS = [CRLF] 1*( SP | HT )
|
||||
// CTL = <any US-ASCII control character
|
||||
// (octets 0 - 31) and DEL (127)>
|
||||
//
|
||||
// RFC 7230 says:
|
||||
// field-value = *( field-content / obs-fold )
|
||||
// obj-fold = N/A to http2, and deprecated
|
||||
// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
|
||||
// field-vchar = VCHAR / obs-text
|
||||
// obs-text = %x80-FF
|
||||
// VCHAR = "any visible [USASCII] character"
|
||||
//
|
||||
// http2 further says: "Similarly, HTTP/2 allows header field values
|
||||
// that are not valid. While most of the values that can be encoded
|
||||
// will not alter header field parsing, carriage return (CR, ASCII
|
||||
// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
|
||||
// 0x0) might be exploited by an attacker if they are translated
|
||||
// verbatim. Any request or response that contains a character not
|
||||
// permitted in a header field value MUST be treated as malformed
|
||||
// (Section 8.1.2.6). Valid characters are defined by the
|
||||
// field-content ABNF rule in Section 3.2 of [RFC7230]."
|
||||
//
|
||||
// This function does not (yet?) properly handle the rejection of
|
||||
// strings that begin or end with SP or HTAB.
|
||||
func ValidHeaderFieldValue(v string) bool {
|
||||
for i := 0; i < len(v); i++ {
|
||||
b := v[i]
|
||||
if isCTL(b) && !isLWS(b) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isASCII(s string) bool {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] >= utf8.RuneSelf {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// PunycodeHostPort returns the IDNA Punycode version
|
||||
// of the provided "host" or "host:port" string.
|
||||
func PunycodeHostPort(v string) (string, error) {
|
||||
if isASCII(v) {
|
||||
return v, nil
|
||||
}
|
||||
|
||||
host, port, err := net.SplitHostPort(v)
|
||||
if err != nil {
|
||||
// The input 'v' argument was just a "host" argument,
|
||||
// without a port. This error should not be returned
|
||||
// to the caller.
|
||||
host = v
|
||||
port = ""
|
||||
}
|
||||
host, err = idna.ToASCII(host)
|
||||
if err != nil {
|
||||
// Non-UTF-8? Not representable in Punycode, in any
|
||||
// case.
|
||||
return "", err
|
||||
}
|
||||
if port == "" {
|
||||
return host, nil
|
||||
}
|
||||
return net.JoinHostPort(host, port), nil
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,22 @@
|
|||
Additional IP Rights Grant (Patents)
|
||||
|
||||
"This implementation" means the copyrightable works distributed by
|
||||
Google as part of the Go project.
|
||||
|
||||
Google hereby grants to You a perpetual, worldwide, non-exclusive,
|
||||
no-charge, royalty-free, irrevocable (except as stated in this section)
|
||||
patent license to make, have made, use, offer to sell, sell, import,
|
||||
transfer and otherwise run, modify and propagate the contents of this
|
||||
implementation of Go, where such license applies only to those patent
|
||||
claims, both currently owned or controlled by Google and acquired in
|
||||
the future, licensable by Google that are necessarily infringed by this
|
||||
implementation of Go. This grant does not include claims that would be
|
||||
infringed only as a consequence of further modification of this
|
||||
implementation. If you or your agent or exclusive licensee institute or
|
||||
order or agree to the institution of patent litigation against any
|
||||
entity (including a cross-claim or counterclaim in a lawsuit) alleging
|
||||
that this implementation of Go or any code incorporated within this
|
||||
implementation of Go constitutes direct or contributory patent
|
||||
infringement, or inducement of patent infringement, then any patent
|
||||
rights granted to you under this License for this implementation of Go
|
||||
shall terminate as of the date such litigation is filed.
|
|
@ -0,0 +1,340 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package bidirule implements the Bidi Rule defined by RFC 5893.
|
||||
//
|
||||
// This package is under development. The API may change without notice and
|
||||
// without preserving backward compatibility.
|
||||
package bidirule
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/bidi"
|
||||
)
|
||||
|
||||
// This file contains an implementation of RFC 5893: Right-to-Left Scripts for
|
||||
// Internationalized Domain Names for Applications (IDNA)
|
||||
//
|
||||
// A label is an individual component of a domain name. Labels are usually
|
||||
// shown separated by dots; for example, the domain name "www.example.com" is
|
||||
// composed of three labels: "www", "example", and "com".
|
||||
//
|
||||
// An RTL label is a label that contains at least one character of class R, AL,
|
||||
// or AN. An LTR label is any label that is not an RTL label.
|
||||
//
|
||||
// A "Bidi domain name" is a domain name that contains at least one RTL label.
|
||||
//
|
||||
// The following guarantees can be made based on the above:
|
||||
//
|
||||
// o In a domain name consisting of only labels that satisfy the rule,
|
||||
// the requirements of Section 3 are satisfied. Note that even LTR
|
||||
// labels and pure ASCII labels have to be tested.
|
||||
//
|
||||
// o In a domain name consisting of only LDH labels (as defined in the
|
||||
// Definitions document [RFC5890]) and labels that satisfy the rule,
|
||||
// the requirements of Section 3 are satisfied as long as a label
|
||||
// that starts with an ASCII digit does not come after a
|
||||
// right-to-left label.
|
||||
//
|
||||
// No guarantee is given for other combinations.
|
||||
|
||||
// ErrInvalid indicates a label is invalid according to the Bidi Rule.
|
||||
var ErrInvalid = errors.New("bidirule: failed Bidi Rule")
|
||||
|
||||
type ruleState uint8
|
||||
|
||||
const (
|
||||
ruleInitial ruleState = iota
|
||||
ruleLTR
|
||||
ruleLTRFinal
|
||||
ruleRTL
|
||||
ruleRTLFinal
|
||||
ruleInvalid
|
||||
)
|
||||
|
||||
type ruleTransition struct {
|
||||
next ruleState
|
||||
mask uint16
|
||||
}
|
||||
|
||||
var transitions = [...][2]ruleTransition{
|
||||
// [2.1] The first character must be a character with Bidi property L, R, or
|
||||
// AL. If it has the R or AL property, it is an RTL label; if it has the L
|
||||
// property, it is an LTR label.
|
||||
ruleInitial: {
|
||||
{ruleLTRFinal, 1 << bidi.L},
|
||||
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL},
|
||||
},
|
||||
ruleRTL: {
|
||||
// [2.3] In an RTL label, the end of the label must be a character with
|
||||
// Bidi property R, AL, EN, or AN, followed by zero or more characters
|
||||
// with Bidi property NSM.
|
||||
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN},
|
||||
|
||||
// [2.2] In an RTL label, only characters with the Bidi properties R,
|
||||
// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
|
||||
// We exclude the entries from [2.3]
|
||||
{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
|
||||
},
|
||||
ruleRTLFinal: {
|
||||
// [2.3] In an RTL label, the end of the label must be a character with
|
||||
// Bidi property R, AL, EN, or AN, followed by zero or more characters
|
||||
// with Bidi property NSM.
|
||||
{ruleRTLFinal, 1<<bidi.R | 1<<bidi.AL | 1<<bidi.EN | 1<<bidi.AN | 1<<bidi.NSM},
|
||||
|
||||
// [2.2] In an RTL label, only characters with the Bidi properties R,
|
||||
// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
|
||||
// We exclude the entries from [2.3] and NSM.
|
||||
{ruleRTL, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
|
||||
},
|
||||
ruleLTR: {
|
||||
// [2.6] In an LTR label, the end of the label must be a character with
|
||||
// Bidi property L or EN, followed by zero or more characters with Bidi
|
||||
// property NSM.
|
||||
{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN},
|
||||
|
||||
// [2.5] In an LTR label, only characters with the Bidi properties L,
|
||||
// EN, ES, CS, ET, ON, BN, or NSM are allowed.
|
||||
// We exclude the entries from [2.6].
|
||||
{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN | 1<<bidi.NSM},
|
||||
},
|
||||
ruleLTRFinal: {
|
||||
// [2.6] In an LTR label, the end of the label must be a character with
|
||||
// Bidi property L or EN, followed by zero or more characters with Bidi
|
||||
// property NSM.
|
||||
{ruleLTRFinal, 1<<bidi.L | 1<<bidi.EN | 1<<bidi.NSM},
|
||||
|
||||
// [2.5] In an LTR label, only characters with the Bidi properties L,
|
||||
// EN, ES, CS, ET, ON, BN, or NSM are allowed.
|
||||
// We exclude the entries from [2.6].
|
||||
{ruleLTR, 1<<bidi.ES | 1<<bidi.CS | 1<<bidi.ET | 1<<bidi.ON | 1<<bidi.BN},
|
||||
},
|
||||
ruleInvalid: {
|
||||
{ruleInvalid, 0},
|
||||
{ruleInvalid, 0},
|
||||
},
|
||||
}
|
||||
|
||||
// [2.4] In an RTL label, if an EN is present, no AN may be present, and
|
||||
// vice versa.
|
||||
const exclusiveRTL = uint16(1<<bidi.EN | 1<<bidi.AN)
|
||||
|
||||
// From RFC 5893
|
||||
// An RTL label is a label that contains at least one character of type
|
||||
// R, AL, or AN.
|
||||
//
|
||||
// An LTR label is any label that is not an RTL label.
|
||||
|
||||
// Direction reports the direction of the given label as defined by RFC 5893.
|
||||
// The Bidi Rule does not have to be applied to labels of the category
|
||||
// LeftToRight.
|
||||
func Direction(b []byte) bidi.Direction {
|
||||
for i := 0; i < len(b); {
|
||||
e, sz := bidi.Lookup(b[i:])
|
||||
if sz == 0 {
|
||||
i++
|
||||
}
|
||||
c := e.Class()
|
||||
if c == bidi.R || c == bidi.AL || c == bidi.AN {
|
||||
return bidi.RightToLeft
|
||||
}
|
||||
i += sz
|
||||
}
|
||||
return bidi.LeftToRight
|
||||
}
|
||||
|
||||
// DirectionString reports the direction of the given label as defined by RFC
|
||||
// 5893. The Bidi Rule does not have to be applied to labels of the category
|
||||
// LeftToRight.
|
||||
func DirectionString(s string) bidi.Direction {
|
||||
for i := 0; i < len(s); {
|
||||
e, sz := bidi.LookupString(s[i:])
|
||||
if sz == 0 {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
c := e.Class()
|
||||
if c == bidi.R || c == bidi.AL || c == bidi.AN {
|
||||
return bidi.RightToLeft
|
||||
}
|
||||
i += sz
|
||||
}
|
||||
return bidi.LeftToRight
|
||||
}
|
||||
|
||||
// Valid reports whether b conforms to the BiDi rule.
|
||||
func Valid(b []byte) bool {
|
||||
var t Transformer
|
||||
if n, ok := t.advance(b); !ok || n < len(b) {
|
||||
return false
|
||||
}
|
||||
return t.isFinal()
|
||||
}
|
||||
|
||||
// ValidString reports whether s conforms to the BiDi rule.
|
||||
func ValidString(s string) bool {
|
||||
var t Transformer
|
||||
if n, ok := t.advanceString(s); !ok || n < len(s) {
|
||||
return false
|
||||
}
|
||||
return t.isFinal()
|
||||
}
|
||||
|
||||
// New returns a Transformer that verifies that input adheres to the Bidi Rule.
|
||||
func New() *Transformer {
|
||||
return &Transformer{}
|
||||
}
|
||||
|
||||
// Transformer implements transform.Transform.
|
||||
type Transformer struct {
|
||||
state ruleState
|
||||
hasRTL bool
|
||||
seen uint16
|
||||
}
|
||||
|
||||
// A rule can only be violated for "Bidi Domain names", meaning if one of the
|
||||
// following categories has been observed.
|
||||
func (t *Transformer) isRTL() bool {
|
||||
const isRTL = 1<<bidi.R | 1<<bidi.AL | 1<<bidi.AN
|
||||
return t.seen&isRTL != 0
|
||||
}
|
||||
|
||||
func (t *Transformer) isFinal() bool {
|
||||
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
|
||||
}
|
||||
|
||||
// Reset implements transform.Transformer.
|
||||
func (t *Transformer) Reset() { *t = Transformer{} }
|
||||
|
||||
// Transform implements transform.Transformer. This Transformer has state and
|
||||
// needs to be reset between uses.
|
||||
func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if len(dst) < len(src) {
|
||||
src = src[:len(dst)]
|
||||
atEOF = false
|
||||
err = transform.ErrShortDst
|
||||
}
|
||||
n, err1 := t.Span(src, atEOF)
|
||||
copy(dst, src[:n])
|
||||
if err == nil || err1 != nil && err1 != transform.ErrShortSrc {
|
||||
err = err1
|
||||
}
|
||||
return n, n, err
|
||||
}
|
||||
|
||||
// Span returns the first n bytes of src that conform to the Bidi rule.
|
||||
func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
if t.state == ruleInvalid && t.isRTL() {
|
||||
return 0, ErrInvalid
|
||||
}
|
||||
n, ok := t.advance(src)
|
||||
switch {
|
||||
case !ok:
|
||||
err = ErrInvalid
|
||||
case n < len(src):
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
err = ErrInvalid
|
||||
case !t.isFinal():
|
||||
err = ErrInvalid
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Precomputing the ASCII values decreases running time for the ASCII fast path
|
||||
// by about 30%.
|
||||
var asciiTable [128]bidi.Properties
|
||||
|
||||
func init() {
|
||||
for i := range asciiTable {
|
||||
p, _ := bidi.LookupRune(rune(i))
|
||||
asciiTable[i] = p
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Transformer) advance(s []byte) (n int, ok bool) {
|
||||
var e bidi.Properties
|
||||
var sz int
|
||||
for n < len(s) {
|
||||
if s[n] < utf8.RuneSelf {
|
||||
e, sz = asciiTable[s[n]], 1
|
||||
} else {
|
||||
e, sz = bidi.Lookup(s[n:])
|
||||
if sz <= 1 {
|
||||
if sz == 1 {
|
||||
// We always consider invalid UTF-8 to be invalid, even if
|
||||
// the string has not yet been determined to be RTL.
|
||||
// TODO: is this correct?
|
||||
return n, false
|
||||
}
|
||||
return n, true // incomplete UTF-8 encoding
|
||||
}
|
||||
}
|
||||
// TODO: using CompactClass would result in noticeable speedup.
|
||||
// See unicode/bidi/prop.go:Properties.CompactClass.
|
||||
c := uint16(1 << e.Class())
|
||||
t.seen |= c
|
||||
if t.seen&exclusiveRTL == exclusiveRTL {
|
||||
t.state = ruleInvalid
|
||||
return n, false
|
||||
}
|
||||
switch tr := transitions[t.state]; {
|
||||
case tr[0].mask&c != 0:
|
||||
t.state = tr[0].next
|
||||
case tr[1].mask&c != 0:
|
||||
t.state = tr[1].next
|
||||
default:
|
||||
t.state = ruleInvalid
|
||||
if t.isRTL() {
|
||||
return n, false
|
||||
}
|
||||
}
|
||||
n += sz
|
||||
}
|
||||
return n, true
|
||||
}
|
||||
|
||||
func (t *Transformer) advanceString(s string) (n int, ok bool) {
|
||||
var e bidi.Properties
|
||||
var sz int
|
||||
for n < len(s) {
|
||||
if s[n] < utf8.RuneSelf {
|
||||
e, sz = asciiTable[s[n]], 1
|
||||
} else {
|
||||
e, sz = bidi.LookupString(s[n:])
|
||||
if sz <= 1 {
|
||||
if sz == 1 {
|
||||
return n, false // invalid UTF-8
|
||||
}
|
||||
return n, true // incomplete UTF-8 encoding
|
||||
}
|
||||
}
|
||||
// TODO: using CompactClass results in noticeable speedup.
|
||||
// See unicode/bidi/prop.go:Properties.CompactClass.
|
||||
c := uint16(1 << e.Class())
|
||||
t.seen |= c
|
||||
if t.seen&exclusiveRTL == exclusiveRTL {
|
||||
t.state = ruleInvalid
|
||||
return n, false
|
||||
}
|
||||
switch tr := transitions[t.state]; {
|
||||
case tr[0].mask&c != 0:
|
||||
t.state = tr[0].next
|
||||
case tr[1].mask&c != 0:
|
||||
t.state = tr[1].next
|
||||
default:
|
||||
t.state = ruleInvalid
|
||||
if t.isRTL() {
|
||||
return n, false
|
||||
}
|
||||
}
|
||||
n += sz
|
||||
}
|
||||
return n, true
|
||||
}
|
|
@ -0,0 +1,705 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package transform provides reader and writer wrappers that transform the
|
||||
// bytes passing through as well as various transformations. Example
|
||||
// transformations provided by other packages include normalization and
|
||||
// conversion between character sets.
|
||||
package transform // import "golang.org/x/text/transform"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrShortDst means that the destination buffer was too short to
|
||||
// receive all of the transformed bytes.
|
||||
ErrShortDst = errors.New("transform: short destination buffer")
|
||||
|
||||
// ErrShortSrc means that the source buffer has insufficient data to
|
||||
// complete the transformation.
|
||||
ErrShortSrc = errors.New("transform: short source buffer")
|
||||
|
||||
// ErrEndOfSpan means that the input and output (the transformed input)
|
||||
// are not identical.
|
||||
ErrEndOfSpan = errors.New("transform: input and output are not identical")
|
||||
|
||||
// errInconsistentByteCount means that Transform returned success (nil
|
||||
// error) but also returned nSrc inconsistent with the src argument.
|
||||
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
|
||||
|
||||
// errShortInternal means that an internal buffer is not large enough
|
||||
// to make progress and the Transform operation must be aborted.
|
||||
errShortInternal = errors.New("transform: short internal buffer")
|
||||
)
|
||||
|
||||
// Transformer transforms bytes.
|
||||
type Transformer interface {
|
||||
// Transform writes to dst the transformed bytes read from src, and
|
||||
// returns the number of dst bytes written and src bytes read. The
|
||||
// atEOF argument tells whether src represents the last bytes of the
|
||||
// input.
|
||||
//
|
||||
// Callers should always process the nDst bytes produced and account
|
||||
// for the nSrc bytes consumed before considering the error err.
|
||||
//
|
||||
// A nil error means that all of the transformed bytes (whether freshly
|
||||
// transformed from src or left over from previous Transform calls)
|
||||
// were written to dst. A nil error can be returned regardless of
|
||||
// whether atEOF is true. If err is nil then nSrc must equal len(src);
|
||||
// the converse is not necessarily true.
|
||||
//
|
||||
// ErrShortDst means that dst was too short to receive all of the
|
||||
// transformed bytes. ErrShortSrc means that src had insufficient data
|
||||
// to complete the transformation. If both conditions apply, then
|
||||
// either error may be returned. Other than the error conditions listed
|
||||
// here, implementations are free to report other errors that arise.
|
||||
Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
|
||||
|
||||
// Reset resets the state and allows a Transformer to be reused.
|
||||
Reset()
|
||||
}
|
||||
|
||||
// SpanningTransformer extends the Transformer interface with a Span method
|
||||
// that determines how much of the input already conforms to the Transformer.
|
||||
type SpanningTransformer interface {
|
||||
Transformer
|
||||
|
||||
// Span returns a position in src such that transforming src[:n] results in
|
||||
// identical output src[:n] for these bytes. It does not necessarily return
|
||||
// the largest such n. The atEOF argument tells whether src represents the
|
||||
// last bytes of the input.
|
||||
//
|
||||
// Callers should always account for the n bytes consumed before
|
||||
// considering the error err.
|
||||
//
|
||||
// A nil error means that all input bytes are known to be identical to the
|
||||
// output produced by the Transformer. A nil error can be be returned
|
||||
// regardless of whether atEOF is true. If err is nil, then then n must
|
||||
// equal len(src); the converse is not necessarily true.
|
||||
//
|
||||
// ErrEndOfSpan means that the Transformer output may differ from the
|
||||
// input after n bytes. Note that n may be len(src), meaning that the output
|
||||
// would contain additional bytes after otherwise identical output.
|
||||
// ErrShortSrc means that src had insufficient data to determine whether the
|
||||
// remaining bytes would change. Other than the error conditions listed
|
||||
// here, implementations are free to report other errors that arise.
|
||||
//
|
||||
// Calling Span can modify the Transformer state as a side effect. In
|
||||
// effect, it does the transformation just as calling Transform would, only
|
||||
// without copying to a destination buffer and only up to a point it can
|
||||
// determine the input and output bytes are the same. This is obviously more
|
||||
// limited than calling Transform, but can be more efficient in terms of
|
||||
// copying and allocating buffers. Calls to Span and Transform may be
|
||||
// interleaved.
|
||||
Span(src []byte, atEOF bool) (n int, err error)
|
||||
}
|
||||
|
||||
// NopResetter can be embedded by implementations of Transformer to add a nop
|
||||
// Reset method.
|
||||
type NopResetter struct{}
|
||||
|
||||
// Reset implements the Reset method of the Transformer interface.
|
||||
func (NopResetter) Reset() {}
|
||||
|
||||
// Reader wraps another io.Reader by transforming the bytes read.
|
||||
type Reader struct {
|
||||
r io.Reader
|
||||
t Transformer
|
||||
err error
|
||||
|
||||
// dst[dst0:dst1] contains bytes that have been transformed by t but
|
||||
// not yet copied out via Read.
|
||||
dst []byte
|
||||
dst0, dst1 int
|
||||
|
||||
// src[src0:src1] contains bytes that have been read from r but not
|
||||
// yet transformed through t.
|
||||
src []byte
|
||||
src0, src1 int
|
||||
|
||||
// transformComplete is whether the transformation is complete,
|
||||
// regardless of whether or not it was successful.
|
||||
transformComplete bool
|
||||
}
|
||||
|
||||
const defaultBufSize = 4096
|
||||
|
||||
// NewReader returns a new Reader that wraps r by transforming the bytes read
|
||||
// via t. It calls Reset on t.
|
||||
func NewReader(r io.Reader, t Transformer) *Reader {
|
||||
t.Reset()
|
||||
return &Reader{
|
||||
r: r,
|
||||
t: t,
|
||||
dst: make([]byte, defaultBufSize),
|
||||
src: make([]byte, defaultBufSize),
|
||||
}
|
||||
}
|
||||
|
||||
// Read implements the io.Reader interface.
|
||||
func (r *Reader) Read(p []byte) (int, error) {
|
||||
n, err := 0, error(nil)
|
||||
for {
|
||||
// Copy out any transformed bytes and return the final error if we are done.
|
||||
if r.dst0 != r.dst1 {
|
||||
n = copy(p, r.dst[r.dst0:r.dst1])
|
||||
r.dst0 += n
|
||||
if r.dst0 == r.dst1 && r.transformComplete {
|
||||
return n, r.err
|
||||
}
|
||||
return n, nil
|
||||
} else if r.transformComplete {
|
||||
return 0, r.err
|
||||
}
|
||||
|
||||
// Try to transform some source bytes, or to flush the transformer if we
|
||||
// are out of source bytes. We do this even if r.r.Read returned an error.
|
||||
// As the io.Reader documentation says, "process the n > 0 bytes returned
|
||||
// before considering the error".
|
||||
if r.src0 != r.src1 || r.err != nil {
|
||||
r.dst0 = 0
|
||||
r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF)
|
||||
r.src0 += n
|
||||
|
||||
switch {
|
||||
case err == nil:
|
||||
if r.src0 != r.src1 {
|
||||
r.err = errInconsistentByteCount
|
||||
}
|
||||
// The Transform call was successful; we are complete if we
|
||||
// cannot read more bytes into src.
|
||||
r.transformComplete = r.err != nil
|
||||
continue
|
||||
case err == ErrShortDst && (r.dst1 != 0 || n != 0):
|
||||
// Make room in dst by copying out, and try again.
|
||||
continue
|
||||
case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil:
|
||||
// Read more bytes into src via the code below, and try again.
|
||||
default:
|
||||
r.transformComplete = true
|
||||
// The reader error (r.err) takes precedence over the
|
||||
// transformer error (err) unless r.err is nil or io.EOF.
|
||||
if r.err == nil || r.err == io.EOF {
|
||||
r.err = err
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Move any untransformed source bytes to the start of the buffer
|
||||
// and read more bytes.
|
||||
if r.src0 != 0 {
|
||||
r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
|
||||
}
|
||||
n, r.err = r.r.Read(r.src[r.src1:])
|
||||
r.src1 += n
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: implement ReadByte (and ReadRune??).
|
||||
|
||||
// Writer wraps another io.Writer by transforming the bytes read.
|
||||
// The user needs to call Close to flush unwritten bytes that may
|
||||
// be buffered.
|
||||
type Writer struct {
|
||||
w io.Writer
|
||||
t Transformer
|
||||
dst []byte
|
||||
|
||||
// src[:n] contains bytes that have not yet passed through t.
|
||||
src []byte
|
||||
n int
|
||||
}
|
||||
|
||||
// NewWriter returns a new Writer that wraps w by transforming the bytes written
|
||||
// via t. It calls Reset on t.
|
||||
func NewWriter(w io.Writer, t Transformer) *Writer {
|
||||
t.Reset()
|
||||
return &Writer{
|
||||
w: w,
|
||||
t: t,
|
||||
dst: make([]byte, defaultBufSize),
|
||||
src: make([]byte, defaultBufSize),
|
||||
}
|
||||
}
|
||||
|
||||
// Write implements the io.Writer interface. If there are not enough
|
||||
// bytes available to complete a Transform, the bytes will be buffered
|
||||
// for the next write. Call Close to convert the remaining bytes.
|
||||
func (w *Writer) Write(data []byte) (n int, err error) {
|
||||
src := data
|
||||
if w.n > 0 {
|
||||
// Append bytes from data to the last remainder.
|
||||
// TODO: limit the amount copied on first try.
|
||||
n = copy(w.src[w.n:], data)
|
||||
w.n += n
|
||||
src = w.src[:w.n]
|
||||
}
|
||||
for {
|
||||
nDst, nSrc, err := w.t.Transform(w.dst, src, false)
|
||||
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
|
||||
return n, werr
|
||||
}
|
||||
src = src[nSrc:]
|
||||
if w.n == 0 {
|
||||
n += nSrc
|
||||
} else if len(src) <= n {
|
||||
// Enough bytes from w.src have been consumed. We make src point
|
||||
// to data instead to reduce the copying.
|
||||
w.n = 0
|
||||
n -= len(src)
|
||||
src = data[n:]
|
||||
if n < len(data) && (err == nil || err == ErrShortSrc) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
switch err {
|
||||
case ErrShortDst:
|
||||
// This error is okay as long as we are making progress.
|
||||
if nDst > 0 || nSrc > 0 {
|
||||
continue
|
||||
}
|
||||
case ErrShortSrc:
|
||||
if len(src) < len(w.src) {
|
||||
m := copy(w.src, src)
|
||||
// If w.n > 0, bytes from data were already copied to w.src and n
|
||||
// was already set to the number of bytes consumed.
|
||||
if w.n == 0 {
|
||||
n += m
|
||||
}
|
||||
w.n = m
|
||||
err = nil
|
||||
} else if nDst > 0 || nSrc > 0 {
|
||||
// Not enough buffer to store the remainder. Keep processing as
|
||||
// long as there is progress. Without this case, transforms that
|
||||
// require a lookahead larger than the buffer may result in an
|
||||
// error. This is not something one may expect to be common in
|
||||
// practice, but it may occur when buffers are set to small
|
||||
// sizes during testing.
|
||||
continue
|
||||
}
|
||||
case nil:
|
||||
if w.n > 0 {
|
||||
err = errInconsistentByteCount
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
|
||||
// Close implements the io.Closer interface.
|
||||
func (w *Writer) Close() error {
|
||||
src := w.src[:w.n]
|
||||
for {
|
||||
nDst, nSrc, err := w.t.Transform(w.dst, src, true)
|
||||
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
|
||||
return werr
|
||||
}
|
||||
if err != ErrShortDst {
|
||||
return err
|
||||
}
|
||||
src = src[nSrc:]
|
||||
}
|
||||
}
|
||||
|
||||
type nop struct{ NopResetter }
|
||||
|
||||
func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
n := copy(dst, src)
|
||||
if n < len(src) {
|
||||
err = ErrShortDst
|
||||
}
|
||||
return n, n, err
|
||||
}
|
||||
|
||||
func (nop) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return len(src), nil
|
||||
}
|
||||
|
||||
type discard struct{ NopResetter }
|
||||
|
||||
func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return 0, len(src), nil
|
||||
}
|
||||
|
||||
var (
|
||||
// Discard is a Transformer for which all Transform calls succeed
|
||||
// by consuming all bytes and writing nothing.
|
||||
Discard Transformer = discard{}
|
||||
|
||||
// Nop is a SpanningTransformer that copies src to dst.
|
||||
Nop SpanningTransformer = nop{}
|
||||
)
|
||||
|
||||
// chain is a sequence of links. A chain with N Transformers has N+1 links and
|
||||
// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst
|
||||
// buffers given to chain.Transform and the middle N-1 buffers are intermediate
|
||||
// buffers owned by the chain. The i'th link transforms bytes from the i'th
|
||||
// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer
|
||||
// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).
|
||||
type chain struct {
|
||||
link []link
|
||||
err error
|
||||
// errStart is the index at which the error occurred plus 1. Processing
|
||||
// errStart at this level at the next call to Transform. As long as
|
||||
// errStart > 0, chain will not consume any more source bytes.
|
||||
errStart int
|
||||
}
|
||||
|
||||
func (c *chain) fatalError(errIndex int, err error) {
|
||||
if i := errIndex + 1; i > c.errStart {
|
||||
c.errStart = i
|
||||
c.err = err
|
||||
}
|
||||
}
|
||||
|
||||
type link struct {
|
||||
t Transformer
|
||||
// b[p:n] holds the bytes to be transformed by t.
|
||||
b []byte
|
||||
p int
|
||||
n int
|
||||
}
|
||||
|
||||
func (l *link) src() []byte {
|
||||
return l.b[l.p:l.n]
|
||||
}
|
||||
|
||||
func (l *link) dst() []byte {
|
||||
return l.b[l.n:]
|
||||
}
|
||||
|
||||
// Chain returns a Transformer that applies t in sequence.
|
||||
func Chain(t ...Transformer) Transformer {
|
||||
if len(t) == 0 {
|
||||
return nop{}
|
||||
}
|
||||
c := &chain{link: make([]link, len(t)+1)}
|
||||
for i, tt := range t {
|
||||
c.link[i].t = tt
|
||||
}
|
||||
// Allocate intermediate buffers.
|
||||
b := make([][defaultBufSize]byte, len(t)-1)
|
||||
for i := range b {
|
||||
c.link[i+1].b = b[i][:]
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// Reset resets the state of Chain. It calls Reset on all the Transformers.
|
||||
func (c *chain) Reset() {
|
||||
for i, l := range c.link {
|
||||
if l.t != nil {
|
||||
l.t.Reset()
|
||||
}
|
||||
c.link[i].p, c.link[i].n = 0, 0
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: make chain use Span (is going to be fun to implement!)
|
||||
|
||||
// Transform applies the transformers of c in sequence.
|
||||
func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
// Set up src and dst in the chain.
|
||||
srcL := &c.link[0]
|
||||
dstL := &c.link[len(c.link)-1]
|
||||
srcL.b, srcL.p, srcL.n = src, 0, len(src)
|
||||
dstL.b, dstL.n = dst, 0
|
||||
var lastFull, needProgress bool // for detecting progress
|
||||
|
||||
// i is the index of the next Transformer to apply, for i in [low, high].
|
||||
// low is the lowest index for which c.link[low] may still produce bytes.
|
||||
// high is the highest index for which c.link[high] has a Transformer.
|
||||
// The error returned by Transform determines whether to increase or
|
||||
// decrease i. We try to completely fill a buffer before converting it.
|
||||
for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; {
|
||||
in, out := &c.link[i], &c.link[i+1]
|
||||
nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i)
|
||||
out.n += nDst
|
||||
in.p += nSrc
|
||||
if i > 0 && in.p == in.n {
|
||||
in.p, in.n = 0, 0
|
||||
}
|
||||
needProgress, lastFull = lastFull, false
|
||||
switch err0 {
|
||||
case ErrShortDst:
|
||||
// Process the destination buffer next. Return if we are already
|
||||
// at the high index.
|
||||
if i == high {
|
||||
return dstL.n, srcL.p, ErrShortDst
|
||||
}
|
||||
if out.n != 0 {
|
||||
i++
|
||||
// If the Transformer at the next index is not able to process any
|
||||
// source bytes there is nothing that can be done to make progress
|
||||
// and the bytes will remain unprocessed. lastFull is used to
|
||||
// detect this and break out of the loop with a fatal error.
|
||||
lastFull = true
|
||||
continue
|
||||
}
|
||||
// The destination buffer was too small, but is completely empty.
|
||||
// Return a fatal error as this transformation can never complete.
|
||||
c.fatalError(i, errShortInternal)
|
||||
case ErrShortSrc:
|
||||
if i == 0 {
|
||||
// Save ErrShortSrc in err. All other errors take precedence.
|
||||
err = ErrShortSrc
|
||||
break
|
||||
}
|
||||
// Source bytes were depleted before filling up the destination buffer.
|
||||
// Verify we made some progress, move the remaining bytes to the errStart
|
||||
// and try to get more source bytes.
|
||||
if needProgress && nSrc == 0 || in.n-in.p == len(in.b) {
|
||||
// There were not enough source bytes to proceed while the source
|
||||
// buffer cannot hold any more bytes. Return a fatal error as this
|
||||
// transformation can never complete.
|
||||
c.fatalError(i, errShortInternal)
|
||||
break
|
||||
}
|
||||
// in.b is an internal buffer and we can make progress.
|
||||
in.p, in.n = 0, copy(in.b, in.src())
|
||||
fallthrough
|
||||
case nil:
|
||||
// if i == low, we have depleted the bytes at index i or any lower levels.
|
||||
// In that case we increase low and i. In all other cases we decrease i to
|
||||
// fetch more bytes before proceeding to the next index.
|
||||
if i > low {
|
||||
i--
|
||||
continue
|
||||
}
|
||||
default:
|
||||
c.fatalError(i, err0)
|
||||
}
|
||||
// Exhausted level low or fatal error: increase low and continue
|
||||
// to process the bytes accepted so far.
|
||||
i++
|
||||
low = i
|
||||
}
|
||||
|
||||
// If c.errStart > 0, this means we found a fatal error. We will clear
|
||||
// all upstream buffers. At this point, no more progress can be made
|
||||
// downstream, as Transform would have bailed while handling ErrShortDst.
|
||||
if c.errStart > 0 {
|
||||
for i := 1; i < c.errStart; i++ {
|
||||
c.link[i].p, c.link[i].n = 0, 0
|
||||
}
|
||||
err, c.errStart, c.err = c.err, 0, nil
|
||||
}
|
||||
return dstL.n, srcL.p, err
|
||||
}
|
||||
|
||||
// Deprecated: use runes.Remove instead.
|
||||
func RemoveFunc(f func(r rune) bool) Transformer {
|
||||
return removeF(f)
|
||||
}
|
||||
|
||||
type removeF func(r rune) bool
|
||||
|
||||
func (removeF) Reset() {}
|
||||
|
||||
// Transform implements the Transformer interface.
|
||||
func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {
|
||||
|
||||
if r = rune(src[0]); r < utf8.RuneSelf {
|
||||
sz = 1
|
||||
} else {
|
||||
r, sz = utf8.DecodeRune(src)
|
||||
|
||||
if sz == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src) {
|
||||
err = ErrShortSrc
|
||||
break
|
||||
}
|
||||
// We replace illegal bytes with RuneError. Not doing so might
|
||||
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
|
||||
// The resulting byte sequence may subsequently contain runes
|
||||
// for which t(r) is true that were passed unnoticed.
|
||||
if !t(r) {
|
||||
if nDst+3 > len(dst) {
|
||||
err = ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += copy(dst[nDst:], "\uFFFD")
|
||||
}
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if !t(r) {
|
||||
if nDst+sz > len(dst) {
|
||||
err = ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += copy(dst[nDst:], src[:sz])
|
||||
}
|
||||
nSrc += sz
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// grow returns a new []byte that is longer than b, and copies the first n bytes
|
||||
// of b to the start of the new slice.
|
||||
func grow(b []byte, n int) []byte {
|
||||
m := len(b)
|
||||
if m <= 32 {
|
||||
m = 64
|
||||
} else if m <= 256 {
|
||||
m *= 2
|
||||
} else {
|
||||
m += m >> 1
|
||||
}
|
||||
buf := make([]byte, m)
|
||||
copy(buf, b[:n])
|
||||
return buf
|
||||
}
|
||||
|
||||
const initialBufSize = 128
|
||||
|
||||
// String returns a string with the result of converting s[:n] using t, where
|
||||
// n <= len(s). If err == nil, n will be len(s). It calls Reset on t.
|
||||
func String(t Transformer, s string) (result string, n int, err error) {
|
||||
t.Reset()
|
||||
if s == "" {
|
||||
// Fast path for the common case for empty input. Results in about a
|
||||
// 86% reduction of running time for BenchmarkStringLowerEmpty.
|
||||
if _, _, err := t.Transform(nil, nil, true); err == nil {
|
||||
return "", 0, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate only once. Note that both dst and src escape when passed to
|
||||
// Transform.
|
||||
buf := [2 * initialBufSize]byte{}
|
||||
dst := buf[:initialBufSize:initialBufSize]
|
||||
src := buf[initialBufSize : 2*initialBufSize]
|
||||
|
||||
// The input string s is transformed in multiple chunks (starting with a
|
||||
// chunk size of initialBufSize). nDst and nSrc are per-chunk (or
|
||||
// per-Transform-call) indexes, pDst and pSrc are overall indexes.
|
||||
nDst, nSrc := 0, 0
|
||||
pDst, pSrc := 0, 0
|
||||
|
||||
// pPrefix is the length of a common prefix: the first pPrefix bytes of the
|
||||
// result will equal the first pPrefix bytes of s. It is not guaranteed to
|
||||
// be the largest such value, but if pPrefix, len(result) and len(s) are
|
||||
// all equal after the final transform (i.e. calling Transform with atEOF
|
||||
// being true returned nil error) then we don't need to allocate a new
|
||||
// result string.
|
||||
pPrefix := 0
|
||||
for {
|
||||
// Invariant: pDst == pPrefix && pSrc == pPrefix.
|
||||
|
||||
n := copy(src, s[pSrc:])
|
||||
nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s))
|
||||
pDst += nDst
|
||||
pSrc += nSrc
|
||||
|
||||
// TODO: let transformers implement an optional Spanner interface, akin
|
||||
// to norm's QuickSpan. This would even allow us to avoid any allocation.
|
||||
if !bytes.Equal(dst[:nDst], src[:nSrc]) {
|
||||
break
|
||||
}
|
||||
pPrefix = pSrc
|
||||
if err == ErrShortDst {
|
||||
// A buffer can only be short if a transformer modifies its input.
|
||||
break
|
||||
} else if err == ErrShortSrc {
|
||||
if nSrc == 0 {
|
||||
// No progress was made.
|
||||
break
|
||||
}
|
||||
// Equal so far and !atEOF, so continue checking.
|
||||
} else if err != nil || pPrefix == len(s) {
|
||||
return string(s[:pPrefix]), pPrefix, err
|
||||
}
|
||||
}
|
||||
// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc.
|
||||
|
||||
// We have transformed the first pSrc bytes of the input s to become pDst
|
||||
// transformed bytes. Those transformed bytes are discontiguous: the first
|
||||
// pPrefix of them equal s[:pPrefix] and the last nDst of them equal
|
||||
// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so
|
||||
// that they become one contiguous slice: dst[:pDst].
|
||||
if pPrefix != 0 {
|
||||
newDst := dst
|
||||
if pDst > len(newDst) {
|
||||
newDst = make([]byte, len(s)+nDst-nSrc)
|
||||
}
|
||||
copy(newDst[pPrefix:pDst], dst[:nDst])
|
||||
copy(newDst[:pPrefix], s[:pPrefix])
|
||||
dst = newDst
|
||||
}
|
||||
|
||||
// Prevent duplicate Transform calls with atEOF being true at the end of
|
||||
// the input. Also return if we have an unrecoverable error.
|
||||
if (err == nil && pSrc == len(s)) ||
|
||||
(err != nil && err != ErrShortDst && err != ErrShortSrc) {
|
||||
return string(dst[:pDst]), pSrc, err
|
||||
}
|
||||
|
||||
// Transform the remaining input, growing dst and src buffers as necessary.
|
||||
for {
|
||||
n := copy(src, s[pSrc:])
|
||||
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
|
||||
pDst += nDst
|
||||
pSrc += nSrc
|
||||
|
||||
// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can
|
||||
// make progress. This may avoid excessive allocations.
|
||||
if err == ErrShortDst {
|
||||
if nDst == 0 {
|
||||
dst = grow(dst, pDst)
|
||||
}
|
||||
} else if err == ErrShortSrc {
|
||||
if nSrc == 0 {
|
||||
src = grow(src, 0)
|
||||
}
|
||||
} else if err != nil || pSrc == len(s) {
|
||||
return string(dst[:pDst]), pSrc, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of converting b[:n] using t,
|
||||
// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t.
|
||||
func Bytes(t Transformer, b []byte) (result []byte, n int, err error) {
|
||||
return doAppend(t, 0, make([]byte, len(b)), b)
|
||||
}
|
||||
|
||||
// Append appends the result of converting src[:n] using t to dst, where
|
||||
// n <= len(src), If err == nil, n will be len(src). It calls Reset on t.
|
||||
func Append(t Transformer, dst, src []byte) (result []byte, n int, err error) {
|
||||
if len(dst) == cap(dst) {
|
||||
n := len(src) + len(dst) // It is okay for this to be 0.
|
||||
b := make([]byte, n)
|
||||
dst = b[:copy(b, dst)]
|
||||
}
|
||||
return doAppend(t, len(dst), dst[:cap(dst)], src)
|
||||
}
|
||||
|
||||
func doAppend(t Transformer, pDst int, dst, src []byte) (result []byte, n int, err error) {
|
||||
t.Reset()
|
||||
pSrc := 0
|
||||
for {
|
||||
nDst, nSrc, err := t.Transform(dst[pDst:], src[pSrc:], true)
|
||||
pDst += nDst
|
||||
pSrc += nSrc
|
||||
if err != ErrShortDst {
|
||||
return dst[:pDst], pSrc, err
|
||||
}
|
||||
|
||||
// Grow the destination buffer, but do not grow as long as we can make
|
||||
// progress. This may avoid excessive allocations.
|
||||
if nDst == 0 {
|
||||
dst = grow(dst, pDst)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,198 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go gen_ranges.go
|
||||
|
||||
// Package bidi contains functionality for bidirectional text support.
|
||||
//
|
||||
// See http://www.unicode.org/reports/tr9.
|
||||
//
|
||||
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
|
||||
// and without notice.
|
||||
package bidi // import "golang.org/x/text/unicode/bidi"
|
||||
|
||||
// TODO:
|
||||
// The following functionality would not be hard to implement, but hinges on
|
||||
// the definition of a Segmenter interface. For now this is up to the user.
|
||||
// - Iterate over paragraphs
|
||||
// - Segmenter to iterate over runs directly from a given text.
|
||||
// Also:
|
||||
// - Transformer for reordering?
|
||||
// - Transformer (validator, really) for Bidi Rule.
|
||||
|
||||
// This API tries to avoid dealing with embedding levels for now. Under the hood
|
||||
// these will be computed, but the question is to which extent the user should
|
||||
// know they exist. We should at some point allow the user to specify an
|
||||
// embedding hierarchy, though.
|
||||
|
||||
// A Direction indicates the overall flow of text.
|
||||
type Direction int
|
||||
|
||||
const (
|
||||
// LeftToRight indicates the text contains no right-to-left characters and
|
||||
// that either there are some left-to-right characters or the option
|
||||
// DefaultDirection(LeftToRight) was passed.
|
||||
LeftToRight Direction = iota
|
||||
|
||||
// RightToLeft indicates the text contains no left-to-right characters and
|
||||
// that either there are some right-to-left characters or the option
|
||||
// DefaultDirection(RightToLeft) was passed.
|
||||
RightToLeft
|
||||
|
||||
// Mixed indicates text contains both left-to-right and right-to-left
|
||||
// characters.
|
||||
Mixed
|
||||
|
||||
// Neutral means that text contains no left-to-right and right-to-left
|
||||
// characters and that no default direction has been set.
|
||||
Neutral
|
||||
)
|
||||
|
||||
type options struct{}
|
||||
|
||||
// An Option is an option for Bidi processing.
|
||||
type Option func(*options)
|
||||
|
||||
// ICU allows the user to define embedding levels. This may be used, for example,
|
||||
// to use hierarchical structure of markup languages to define embeddings.
|
||||
// The following option may be a way to expose this functionality in this API.
|
||||
// // LevelFunc sets a function that associates nesting levels with the given text.
|
||||
// // The levels function will be called with monotonically increasing values for p.
|
||||
// func LevelFunc(levels func(p int) int) Option {
|
||||
// panic("unimplemented")
|
||||
// }
|
||||
|
||||
// DefaultDirection sets the default direction for a Paragraph. The direction is
|
||||
// overridden if the text contains directional characters.
|
||||
func DefaultDirection(d Direction) Option {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// A Paragraph holds a single Paragraph for Bidi processing.
|
||||
type Paragraph struct {
|
||||
// buffers
|
||||
}
|
||||
|
||||
// SetBytes configures p for the given paragraph text. It replaces text
|
||||
// previously set by SetBytes or SetString. If b contains a paragraph separator
|
||||
// it will only process the first paragraph and report the number of bytes
|
||||
// consumed from b including this separator. Error may be non-nil if options are
|
||||
// given.
|
||||
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// SetString configures p for the given paragraph text. It replaces text
|
||||
// previously set by SetBytes or SetString. If b contains a paragraph separator
|
||||
// it will only process the first paragraph and report the number of bytes
|
||||
// consumed from b including this separator. Error may be non-nil if options are
|
||||
// given.
|
||||
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// IsLeftToRight reports whether the principle direction of rendering for this
|
||||
// paragraphs is left-to-right. If this returns false, the principle direction
|
||||
// of rendering is right-to-left.
|
||||
func (p *Paragraph) IsLeftToRight() bool {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Direction returns the direction of the text of this paragraph.
|
||||
//
|
||||
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
|
||||
func (p *Paragraph) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// RunAt reports the Run at the given position of the input text.
|
||||
//
|
||||
// This method can be used for computing line breaks on paragraphs.
|
||||
func (p *Paragraph) RunAt(pos int) Run {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Order computes the visual ordering of all the runs in a Paragraph.
|
||||
func (p *Paragraph) Order() (Ordering, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Line computes the visual ordering of runs for a single line starting and
|
||||
// ending at the given positions in the original text.
|
||||
func (p *Paragraph) Line(start, end int) (Ordering, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// An Ordering holds the computed visual order of runs of a Paragraph. Calling
|
||||
// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
|
||||
// The methods of an Ordering should only be called by one goroutine at a time.
|
||||
type Ordering struct{}
|
||||
|
||||
// Direction reports the directionality of the runs.
|
||||
//
|
||||
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
|
||||
func (o *Ordering) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// NumRuns returns the number of runs.
|
||||
func (o *Ordering) NumRuns() int {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Run returns the ith run within the ordering.
|
||||
func (o *Ordering) Run(i int) Run {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// TODO: perhaps with options.
|
||||
// // Reorder creates a reader that reads the runes in visual order per character.
|
||||
// // Modifiers remain after the runes they modify.
|
||||
// func (l *Runs) Reorder() io.Reader {
|
||||
// panic("unimplemented")
|
||||
// }
|
||||
|
||||
// A Run is a continuous sequence of characters of a single direction.
|
||||
type Run struct {
|
||||
}
|
||||
|
||||
// String returns the text of the run in its original order.
|
||||
func (r *Run) String() string {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Bytes returns the text of the run in its original order.
|
||||
func (r *Run) Bytes() []byte {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// TODO: methods for
|
||||
// - Display order
|
||||
// - headers and footers
|
||||
// - bracket replacement.
|
||||
|
||||
// Direction reports the direction of the run.
|
||||
func (r *Run) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Position of the Run within the text passed to SetBytes or SetString of the
|
||||
// originating Paragraph value.
|
||||
func (r *Run) Pos() (start, end int) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// AppendReverse reverses the order of characters of in, appends them to out,
|
||||
// and returns the result. Modifiers will still follow the runes they modify.
|
||||
// Brackets are replaced with their counterparts.
|
||||
func AppendReverse(out, in []byte) []byte {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// ReverseString reverses the order of characters in s and returns a new string.
|
||||
// Modifiers will still follow the runes they modify. Brackets are replaced with
|
||||
// their counterparts.
|
||||
func ReverseString(s string) string {
|
||||
panic("unimplemented")
|
||||
}
|
|
@ -0,0 +1,335 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// This file contains a port of the reference implementation of the
|
||||
// Bidi Parentheses Algorithm:
|
||||
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
|
||||
//
|
||||
// The implementation in this file covers definitions BD14-BD16 and rule N0
|
||||
// of UAX#9.
|
||||
//
|
||||
// Some preprocessing is done for each rune before data is passed to this
|
||||
// algorithm:
|
||||
// - opening and closing brackets are identified
|
||||
// - a bracket pair type, like '(' and ')' is assigned a unique identifier that
|
||||
// is identical for the opening and closing bracket. It is left to do these
|
||||
// mappings.
|
||||
// - The BPA algorithm requires that bracket characters that are canonical
|
||||
// equivalents of each other be able to be substituted for each other.
|
||||
// It is the responsibility of the caller to do this canonicalization.
|
||||
//
|
||||
// In implementing BD16, this implementation departs slightly from the "logical"
|
||||
// algorithm defined in UAX#9. In particular, the stack referenced there
|
||||
// supports operations that go beyond a "basic" stack. An equivalent
|
||||
// implementation based on a linked list is used here.
|
||||
|
||||
// Bidi_Paired_Bracket_Type
|
||||
// BD14. An opening paired bracket is a character whose
|
||||
// Bidi_Paired_Bracket_Type property value is Open.
|
||||
//
|
||||
// BD15. A closing paired bracket is a character whose
|
||||
// Bidi_Paired_Bracket_Type property value is Close.
|
||||
type bracketType byte
|
||||
|
||||
const (
|
||||
bpNone bracketType = iota
|
||||
bpOpen
|
||||
bpClose
|
||||
)
|
||||
|
||||
// bracketPair holds a pair of index values for opening and closing bracket
|
||||
// location of a bracket pair.
|
||||
type bracketPair struct {
|
||||
opener int
|
||||
closer int
|
||||
}
|
||||
|
||||
func (b *bracketPair) String() string {
|
||||
return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
|
||||
}
|
||||
|
||||
// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
|
||||
type bracketPairs []bracketPair
|
||||
|
||||
func (b bracketPairs) Len() int { return len(b) }
|
||||
func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
|
||||
|
||||
// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
|
||||
//
|
||||
// For each rune, it takes the indexes into the original string, the class the
|
||||
// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
|
||||
// takes the direction type for the start-of-sentence and the embedding level.
|
||||
//
|
||||
// The identifiers for bracket types are the rune of the canonicalized opening
|
||||
// bracket for brackets (open or close) or 0 for runes that are not brackets.
|
||||
func resolvePairedBrackets(s *isolatingRunSequence) {
|
||||
p := bracketPairer{
|
||||
sos: s.sos,
|
||||
openers: list.New(),
|
||||
codesIsolatedRun: s.types,
|
||||
indexes: s.indexes,
|
||||
}
|
||||
dirEmbed := L
|
||||
if s.level&1 != 0 {
|
||||
dirEmbed = R
|
||||
}
|
||||
p.locateBrackets(s.p.pairTypes, s.p.pairValues)
|
||||
p.resolveBrackets(dirEmbed, s.p.initialTypes)
|
||||
}
|
||||
|
||||
type bracketPairer struct {
|
||||
sos Class // direction corresponding to start of sequence
|
||||
|
||||
// The following is a restatement of BD 16 using non-algorithmic language.
|
||||
//
|
||||
// A bracket pair is a pair of characters consisting of an opening
|
||||
// paired bracket and a closing paired bracket such that the
|
||||
// Bidi_Paired_Bracket property value of the former equals the latter,
|
||||
// subject to the following constraints.
|
||||
// - both characters of a pair occur in the same isolating run sequence
|
||||
// - the closing character of a pair follows the opening character
|
||||
// - any bracket character can belong at most to one pair, the earliest possible one
|
||||
// - any bracket character not part of a pair is treated like an ordinary character
|
||||
// - pairs may nest properly, but their spans may not overlap otherwise
|
||||
|
||||
// Bracket characters with canonical decompositions are supposed to be
|
||||
// treated as if they had been normalized, to allow normalized and non-
|
||||
// normalized text to give the same result. In this implementation that step
|
||||
// is pushed out to the caller. The caller has to ensure that the pairValue
|
||||
// slices contain the rune of the opening bracket after normalization for
|
||||
// any opening or closing bracket.
|
||||
|
||||
openers *list.List // list of positions for opening brackets
|
||||
|
||||
// bracket pair positions sorted by location of opening bracket
|
||||
pairPositions bracketPairs
|
||||
|
||||
codesIsolatedRun []Class // directional bidi codes for an isolated run
|
||||
indexes []int // array of index values into the original string
|
||||
|
||||
}
|
||||
|
||||
// matchOpener reports whether characters at given positions form a matching
|
||||
// bracket pair.
|
||||
func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
|
||||
return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
|
||||
}
|
||||
|
||||
const maxPairingDepth = 63
|
||||
|
||||
// locateBrackets locates matching bracket pairs according to BD16.
|
||||
//
|
||||
// This implementation uses a linked list instead of a stack, because, while
|
||||
// elements are added at the front (like a push) they are not generally removed
|
||||
// in atomic 'pop' operations, reducing the benefit of the stack archetype.
|
||||
func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
|
||||
// traverse the run
|
||||
// do that explicitly (not in a for-each) so we can record position
|
||||
for i, index := range p.indexes {
|
||||
|
||||
// look at the bracket type for each character
|
||||
if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
|
||||
// continue scanning
|
||||
continue
|
||||
}
|
||||
switch pairTypes[index] {
|
||||
case bpOpen:
|
||||
// check if maximum pairing depth reached
|
||||
if p.openers.Len() == maxPairingDepth {
|
||||
p.openers.Init()
|
||||
return
|
||||
}
|
||||
// remember opener location, most recent first
|
||||
p.openers.PushFront(i)
|
||||
|
||||
case bpClose:
|
||||
// see if there is a match
|
||||
count := 0
|
||||
for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
|
||||
count++
|
||||
opener := elem.Value.(int)
|
||||
if p.matchOpener(pairValues, opener, i) {
|
||||
// if the opener matches, add nested pair to the ordered list
|
||||
p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
|
||||
// remove up to and including matched opener
|
||||
for ; count > 0; count-- {
|
||||
p.openers.Remove(p.openers.Front())
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
sort.Sort(p.pairPositions)
|
||||
// if we get here, the closing bracket matched no openers
|
||||
// and gets ignored
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bracket pairs within an isolating run sequence are processed as units so
|
||||
// that both the opening and the closing paired bracket in a pair resolve to
|
||||
// the same direction.
|
||||
//
|
||||
// N0. Process bracket pairs in an isolating run sequence sequentially in
|
||||
// the logical order of the text positions of the opening paired brackets
|
||||
// using the logic given below. Within this scope, bidirectional types EN
|
||||
// and AN are treated as R.
|
||||
//
|
||||
// Identify the bracket pairs in the current isolating run sequence
|
||||
// according to BD16. For each bracket-pair element in the list of pairs of
|
||||
// text positions:
|
||||
//
|
||||
// a Inspect the bidirectional types of the characters enclosed within the
|
||||
// bracket pair.
|
||||
//
|
||||
// b If any strong type (either L or R) matching the embedding direction is
|
||||
// found, set the type for both brackets in the pair to match the embedding
|
||||
// direction.
|
||||
//
|
||||
// o [ e ] o -> o e e e o
|
||||
//
|
||||
// o [ o e ] -> o e o e e
|
||||
//
|
||||
// o [ NI e ] -> o e NI e e
|
||||
//
|
||||
// c Otherwise, if a strong type (opposite the embedding direction) is
|
||||
// found, test for adjacent strong types as follows: 1 First, check
|
||||
// backwards before the opening paired bracket until the first strong type
|
||||
// (L, R, or sos) is found. If that first preceding strong type is opposite
|
||||
// the embedding direction, then set the type for both brackets in the pair
|
||||
// to that type. 2 Otherwise, set the type for both brackets in the pair to
|
||||
// the embedding direction.
|
||||
//
|
||||
// o [ o ] e -> o o o o e
|
||||
//
|
||||
// o [ o NI ] o -> o o o NI o o
|
||||
//
|
||||
// e [ o ] o -> e e o e o
|
||||
//
|
||||
// e [ o ] e -> e e o e e
|
||||
//
|
||||
// e ( o [ o ] NI ) e -> e e o o o o NI e e
|
||||
//
|
||||
// d Otherwise, do not set the type for the current bracket pair. Note that
|
||||
// if the enclosed text contains no strong types the paired brackets will
|
||||
// both resolve to the same level when resolved individually using rules N1
|
||||
// and N2.
|
||||
//
|
||||
// e ( NI ) o -> e ( NI ) o
|
||||
|
||||
// getStrongTypeN0 maps character's directional code to strong type as required
|
||||
// by rule N0.
|
||||
//
|
||||
// TODO: have separate type for "strong" directionality.
|
||||
func (p *bracketPairer) getStrongTypeN0(index int) Class {
|
||||
switch p.codesIsolatedRun[index] {
|
||||
// in the scope of N0, number types are treated as R
|
||||
case EN, AN, AL, R:
|
||||
return R
|
||||
case L:
|
||||
return L
|
||||
default:
|
||||
return ON
|
||||
}
|
||||
}
|
||||
|
||||
// classifyPairContent reports the strong types contained inside a Bracket Pair,
|
||||
// assuming the given embedding direction.
|
||||
//
|
||||
// It returns ON if no strong type is found. If a single strong type is found,
|
||||
// it returns this this type. Otherwise it returns the embedding direction.
|
||||
//
|
||||
// TODO: use separate type for "strong" directionality.
|
||||
func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
|
||||
dirOpposite := ON
|
||||
for i := loc.opener + 1; i < loc.closer; i++ {
|
||||
dir := p.getStrongTypeN0(i)
|
||||
if dir == ON {
|
||||
continue
|
||||
}
|
||||
if dir == dirEmbed {
|
||||
return dir // type matching embedding direction found
|
||||
}
|
||||
dirOpposite = dir
|
||||
}
|
||||
// return ON if no strong type found, or class opposite to dirEmbed
|
||||
return dirOpposite
|
||||
}
|
||||
|
||||
// classBeforePair determines which strong types are present before a Bracket
|
||||
// Pair. Return R or L if strong type found, otherwise ON.
|
||||
func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
|
||||
for i := loc.opener - 1; i >= 0; i-- {
|
||||
if dir := p.getStrongTypeN0(i); dir != ON {
|
||||
return dir
|
||||
}
|
||||
}
|
||||
// no strong types found, return sos
|
||||
return p.sos
|
||||
}
|
||||
|
||||
// assignBracketType implements rule N0 for a single bracket pair.
|
||||
func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
|
||||
// rule "N0, a", inspect contents of pair
|
||||
dirPair := p.classifyPairContent(loc, dirEmbed)
|
||||
|
||||
// dirPair is now L, R, or N (no strong type found)
|
||||
|
||||
// the following logical tests are performed out of order compared to
|
||||
// the statement of the rules but yield the same results
|
||||
if dirPair == ON {
|
||||
return // case "d" - nothing to do
|
||||
}
|
||||
|
||||
if dirPair != dirEmbed {
|
||||
// case "c": strong type found, opposite - check before (c.1)
|
||||
dirPair = p.classBeforePair(loc)
|
||||
if dirPair == dirEmbed || dirPair == ON {
|
||||
// no strong opposite type found before - use embedding (c.2)
|
||||
dirPair = dirEmbed
|
||||
}
|
||||
}
|
||||
// else: case "b", strong type found matching embedding,
|
||||
// no explicit action needed, as dirPair is already set to embedding
|
||||
// direction
|
||||
|
||||
// set the bracket types to the type found
|
||||
p.setBracketsToType(loc, dirPair, initialTypes)
|
||||
}
|
||||
|
||||
func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
|
||||
p.codesIsolatedRun[loc.opener] = dirPair
|
||||
p.codesIsolatedRun[loc.closer] = dirPair
|
||||
|
||||
for i := loc.opener + 1; i < loc.closer; i++ {
|
||||
index := p.indexes[i]
|
||||
if initialTypes[index] != NSM {
|
||||
break
|
||||
}
|
||||
p.codesIsolatedRun[i] = dirPair
|
||||
}
|
||||
|
||||
for i := loc.closer + 1; i < len(p.indexes); i++ {
|
||||
index := p.indexes[i]
|
||||
if initialTypes[index] != NSM {
|
||||
break
|
||||
}
|
||||
p.codesIsolatedRun[i] = dirPair
|
||||
}
|
||||
}
|
||||
|
||||
// resolveBrackets implements rule N0 for a list of pairs.
|
||||
func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
|
||||
for _, loc := range p.pairPositions {
|
||||
p.assignBracketType(loc, dirEmbed, initialTypes)
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,133 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var outputFile = flag.String("out", "tables.go", "output file")
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
|
||||
gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
|
||||
|
||||
genTables()
|
||||
}
|
||||
|
||||
// bidiClass names and codes taken from class "bc" in
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
|
||||
var bidiClass = map[string]Class{
|
||||
"AL": AL, // ArabicLetter
|
||||
"AN": AN, // ArabicNumber
|
||||
"B": B, // ParagraphSeparator
|
||||
"BN": BN, // BoundaryNeutral
|
||||
"CS": CS, // CommonSeparator
|
||||
"EN": EN, // EuropeanNumber
|
||||
"ES": ES, // EuropeanSeparator
|
||||
"ET": ET, // EuropeanTerminator
|
||||
"L": L, // LeftToRight
|
||||
"NSM": NSM, // NonspacingMark
|
||||
"ON": ON, // OtherNeutral
|
||||
"R": R, // RightToLeft
|
||||
"S": S, // SegmentSeparator
|
||||
"WS": WS, // WhiteSpace
|
||||
|
||||
"FSI": Control,
|
||||
"PDF": Control,
|
||||
"PDI": Control,
|
||||
"LRE": Control,
|
||||
"LRI": Control,
|
||||
"LRO": Control,
|
||||
"RLE": Control,
|
||||
"RLI": Control,
|
||||
"RLO": Control,
|
||||
}
|
||||
|
||||
func genTables() {
|
||||
if numClass > 0x0F {
|
||||
log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
|
||||
}
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile(*outputFile, "bidi")
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
t := triegen.NewTrie("bidi")
|
||||
|
||||
// Build data about bracket mapping. These bits need to be or-ed with
|
||||
// any other bits.
|
||||
orMask := map[rune]uint64{}
|
||||
|
||||
xorMap := map[rune]int{}
|
||||
xorMasks := []rune{0} // First value is no-op.
|
||||
|
||||
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
|
||||
r1 := p.Rune(0)
|
||||
r2 := p.Rune(1)
|
||||
xor := r1 ^ r2
|
||||
if _, ok := xorMap[xor]; !ok {
|
||||
xorMap[xor] = len(xorMasks)
|
||||
xorMasks = append(xorMasks, xor)
|
||||
}
|
||||
entry := uint64(xorMap[xor]) << xorMaskShift
|
||||
switch p.String(2) {
|
||||
case "o":
|
||||
entry |= openMask
|
||||
case "c", "n":
|
||||
default:
|
||||
log.Fatalf("Unknown bracket class %q.", p.String(2))
|
||||
}
|
||||
orMask[r1] = entry
|
||||
})
|
||||
|
||||
w.WriteComment(`
|
||||
xorMasks contains masks to be xor-ed with brackets to get the reverse
|
||||
version.`)
|
||||
w.WriteVar("xorMasks", xorMasks)
|
||||
|
||||
done := map[rune]bool{}
|
||||
|
||||
insert := func(r rune, c Class) {
|
||||
if !done[r] {
|
||||
t.Insert(r, orMask[r]|uint64(c))
|
||||
done[r] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the derived BiDi properties.
|
||||
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
class, ok := bidiClass[p.String(1)]
|
||||
if !ok {
|
||||
log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
|
||||
}
|
||||
insert(r, class)
|
||||
})
|
||||
visitDefaults(insert)
|
||||
|
||||
// TODO: use sparse blocks. This would reduce table size considerably
|
||||
// from the looks of it.
|
||||
|
||||
sz, err := t.Gen(w)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
w.Size += sz
|
||||
}
|
||||
|
||||
// dummy values to make methods in gen_common compile. The real versions
|
||||
// will be generated by this file to tables.go.
|
||||
var (
|
||||
xorMasks []rune
|
||||
)
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
// These tables are hand-extracted from:
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
|
||||
func visitDefaults(fn func(r rune, c Class)) {
|
||||
// first write default values for ranges listed above.
|
||||
visitRunes(fn, AL, []rune{
|
||||
0x0600, 0x07BF, // Arabic
|
||||
0x08A0, 0x08FF, // Arabic Extended-A
|
||||
0xFB50, 0xFDCF, // Arabic Presentation Forms
|
||||
0xFDF0, 0xFDFF,
|
||||
0xFE70, 0xFEFF,
|
||||
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
|
||||
})
|
||||
visitRunes(fn, R, []rune{
|
||||
0x0590, 0x05FF, // Hebrew
|
||||
0x07C0, 0x089F, // Nko et al.
|
||||
0xFB1D, 0xFB4F,
|
||||
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
|
||||
0x0001E800, 0x0001EDFF,
|
||||
0x0001EF00, 0x0001EFFF,
|
||||
})
|
||||
visitRunes(fn, ET, []rune{ // European Terminator
|
||||
0x20A0, 0x20Cf, // Currency symbols
|
||||
})
|
||||
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
|
||||
fn(r, BN) // Boundary Neutral
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
|
||||
if p.String(1) == "Default_Ignorable_Code_Point" {
|
||||
fn(p.Rune(0), BN) // Boundary Neutral
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
|
||||
for i := 0; i < len(runes); i += 2 {
|
||||
lo, hi := runes[i], runes[i+1]
|
||||
for j := lo; j <= hi; j++ {
|
||||
fn(j, c)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// Class is the Unicode BiDi class. Each rune has a single class.
|
||||
type Class uint
|
||||
|
||||
const (
|
||||
L Class = iota // LeftToRight
|
||||
R // RightToLeft
|
||||
EN // EuropeanNumber
|
||||
ES // EuropeanSeparator
|
||||
ET // EuropeanTerminator
|
||||
AN // ArabicNumber
|
||||
CS // CommonSeparator
|
||||
B // ParagraphSeparator
|
||||
S // SegmentSeparator
|
||||
WS // WhiteSpace
|
||||
ON // OtherNeutral
|
||||
BN // BoundaryNeutral
|
||||
NSM // NonspacingMark
|
||||
AL // ArabicLetter
|
||||
Control // Control LRO - PDI
|
||||
|
||||
numClass
|
||||
|
||||
LRO // LeftToRightOverride
|
||||
RLO // RightToLeftOverride
|
||||
LRE // LeftToRightEmbedding
|
||||
RLE // RightToLeftEmbedding
|
||||
PDF // PopDirectionalFormat
|
||||
LRI // LeftToRightIsolate
|
||||
RLI // RightToLeftIsolate
|
||||
FSI // FirstStrongIsolate
|
||||
PDI // PopDirectionalIsolate
|
||||
|
||||
unknownClass = ^Class(0)
|
||||
)
|
||||
|
||||
var controlToClass = map[rune]Class{
|
||||
0x202D: LRO, // LeftToRightOverride,
|
||||
0x202E: RLO, // RightToLeftOverride,
|
||||
0x202A: LRE, // LeftToRightEmbedding,
|
||||
0x202B: RLE, // RightToLeftEmbedding,
|
||||
0x202C: PDF, // PopDirectionalFormat,
|
||||
0x2066: LRI, // LeftToRightIsolate,
|
||||
0x2067: RLI, // RightToLeftIsolate,
|
||||
0x2068: FSI, // FirstStrongIsolate,
|
||||
0x2069: PDI, // PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// A trie entry has the following bits:
|
||||
// 7..5 XOR mask for brackets
|
||||
// 4 1: Bracket open, 0: Bracket close
|
||||
// 3..0 Class type
|
||||
|
||||
const (
|
||||
openMask = 0x10
|
||||
xorMaskShift = 5
|
||||
)
|
|
@ -0,0 +1,206 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// Properties provides access to BiDi properties of runes.
|
||||
type Properties struct {
|
||||
entry uint8
|
||||
last uint8
|
||||
}
|
||||
|
||||
var trie = newBidiTrie(0)
|
||||
|
||||
// TODO: using this for bidirule reduces the running time by about 5%. Consider
|
||||
// if this is worth exposing or if we can find a way to speed up the Class
|
||||
// method.
|
||||
//
|
||||
// // CompactClass is like Class, but maps all of the BiDi control classes
|
||||
// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
|
||||
// func (p Properties) CompactClass() Class {
|
||||
// return Class(p.entry & 0x0F)
|
||||
// }
|
||||
|
||||
// Class returns the Bidi class for p.
|
||||
func (p Properties) Class() Class {
|
||||
c := Class(p.entry & 0x0F)
|
||||
if c == Control {
|
||||
c = controlByteToClass[p.last&0xF]
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// IsBracket reports whether the rune is a bracket.
|
||||
func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
|
||||
|
||||
// IsOpeningBracket reports whether the rune is an opening bracket.
|
||||
// IsBracket must return true.
|
||||
func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
|
||||
|
||||
// TODO: find a better API and expose.
|
||||
func (p Properties) reverseBracket(r rune) rune {
|
||||
return xorMasks[p.entry>>xorMaskShift] ^ r
|
||||
}
|
||||
|
||||
var controlByteToClass = [16]Class{
|
||||
0xD: LRO, // U+202D LeftToRightOverride,
|
||||
0xE: RLO, // U+202E RightToLeftOverride,
|
||||
0xA: LRE, // U+202A LeftToRightEmbedding,
|
||||
0xB: RLE, // U+202B RightToLeftEmbedding,
|
||||
0xC: PDF, // U+202C PopDirectionalFormat,
|
||||
0x6: LRI, // U+2066 LeftToRightIsolate,
|
||||
0x7: RLI, // U+2067 RightToLeftIsolate,
|
||||
0x8: FSI, // U+2068 FirstStrongIsolate,
|
||||
0x9: PDI, // U+2069 PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// LookupRune returns properties for r.
|
||||
func LookupRune(r rune) (p Properties, size int) {
|
||||
var buf [4]byte
|
||||
n := utf8.EncodeRune(buf[:], r)
|
||||
return Lookup(buf[:n])
|
||||
}
|
||||
|
||||
// TODO: these lookup methods are based on the generated trie code. The returned
|
||||
// sizes have slightly different semantics from the generated code, in that it
|
||||
// always returns size==1 for an illegal UTF-8 byte (instead of the length
|
||||
// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
|
||||
// leave invalid UTF-8 untouched, in which case it has performance benefits to
|
||||
// do so (without changing the semantics). Bidi requires the semantics used here
|
||||
// for the bidirule implementation to be compatible with the Go semantics.
|
||||
// They ultimately should perhaps be adopted by all trie implementations, for
|
||||
// convenience sake.
|
||||
// This unrolled code also boosts performance of the secure/bidirule package by
|
||||
// about 30%.
|
||||
// So, to remove this code:
|
||||
// - add option to trie generator to define return type.
|
||||
// - always return 1 byte size for ill-formed UTF-8 runes.
|
||||
|
||||
// Lookup returns properties for the first rune in s and the width in bytes of
|
||||
// its encoding. The size will be 0 if s does not hold enough bytes to complete
|
||||
// the encoding.
|
||||
func Lookup(s []byte) (p Properties, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return Properties{entry: bidiValues[c0]}, 1
|
||||
case c0 < 0xC2:
|
||||
return Properties{}, 1
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = bidiIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
|
||||
}
|
||||
// Illegal rune
|
||||
return Properties{}, 1
|
||||
}
|
||||
|
||||
// LookupString returns properties for the first rune in s and the width in
|
||||
// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
|
||||
// complete the encoding.
|
||||
func LookupString(s string) (p Properties, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return Properties{entry: bidiValues[c0]}, 1
|
||||
case c0 < 0xC2:
|
||||
return Properties{}, 1
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = bidiIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
|
||||
}
|
||||
// Illegal rune
|
||||
return Properties{}, 1
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,60 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package bidi
|
||||
|
||||
// Class is the Unicode BiDi class. Each rune has a single class.
|
||||
type Class uint
|
||||
|
||||
const (
|
||||
L Class = iota // LeftToRight
|
||||
R // RightToLeft
|
||||
EN // EuropeanNumber
|
||||
ES // EuropeanSeparator
|
||||
ET // EuropeanTerminator
|
||||
AN // ArabicNumber
|
||||
CS // CommonSeparator
|
||||
B // ParagraphSeparator
|
||||
S // SegmentSeparator
|
||||
WS // WhiteSpace
|
||||
ON // OtherNeutral
|
||||
BN // BoundaryNeutral
|
||||
NSM // NonspacingMark
|
||||
AL // ArabicLetter
|
||||
Control // Control LRO - PDI
|
||||
|
||||
numClass
|
||||
|
||||
LRO // LeftToRightOverride
|
||||
RLO // RightToLeftOverride
|
||||
LRE // LeftToRightEmbedding
|
||||
RLE // RightToLeftEmbedding
|
||||
PDF // PopDirectionalFormat
|
||||
LRI // LeftToRightIsolate
|
||||
RLI // RightToLeftIsolate
|
||||
FSI // FirstStrongIsolate
|
||||
PDI // PopDirectionalIsolate
|
||||
|
||||
unknownClass = ^Class(0)
|
||||
)
|
||||
|
||||
var controlToClass = map[rune]Class{
|
||||
0x202D: LRO, // LeftToRightOverride,
|
||||
0x202E: RLO, // RightToLeftOverride,
|
||||
0x202A: LRE, // LeftToRightEmbedding,
|
||||
0x202B: RLE, // RightToLeftEmbedding,
|
||||
0x202C: PDF, // PopDirectionalFormat,
|
||||
0x2066: LRI, // LeftToRightIsolate,
|
||||
0x2067: RLI, // RightToLeftIsolate,
|
||||
0x2068: FSI, // FirstStrongIsolate,
|
||||
0x2069: PDI, // PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// A trie entry has the following bits:
|
||||
// 7..5 XOR mask for brackets
|
||||
// 4 1: Bracket open, 0: Bracket close
|
||||
// 3..0 Class type
|
||||
|
||||
const (
|
||||
openMask = 0x10
|
||||
xorMaskShift = 5
|
||||
)
|
|
@ -0,0 +1,508 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
const (
|
||||
maxNonStarters = 30
|
||||
// The maximum number of characters needed for a buffer is
|
||||
// maxNonStarters + 1 for the starter + 1 for the GCJ
|
||||
maxBufferSize = maxNonStarters + 2
|
||||
maxNFCExpansion = 3 // NFC(0x1D160)
|
||||
maxNFKCExpansion = 18 // NFKC(0xFDFA)
|
||||
|
||||
maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
|
||||
)
|
||||
|
||||
// ssState is used for reporting the segment state after inserting a rune.
|
||||
// It is returned by streamSafe.next.
|
||||
type ssState int
|
||||
|
||||
const (
|
||||
// Indicates a rune was successfully added to the segment.
|
||||
ssSuccess ssState = iota
|
||||
// Indicates a rune starts a new segment and should not be added.
|
||||
ssStarter
|
||||
// Indicates a rune caused a segment overflow and a CGJ should be inserted.
|
||||
ssOverflow
|
||||
)
|
||||
|
||||
// streamSafe implements the policy of when a CGJ should be inserted.
|
||||
type streamSafe uint8
|
||||
|
||||
// first inserts the first rune of a segment. It is a faster version of next if
|
||||
// it is known p represents the first rune in a segment.
|
||||
func (ss *streamSafe) first(p Properties) {
|
||||
*ss = streamSafe(p.nTrailingNonStarters())
|
||||
}
|
||||
|
||||
// insert returns a ssState value to indicate whether a rune represented by p
|
||||
// can be inserted.
|
||||
func (ss *streamSafe) next(p Properties) ssState {
|
||||
if *ss > maxNonStarters {
|
||||
panic("streamSafe was not reset")
|
||||
}
|
||||
n := p.nLeadingNonStarters()
|
||||
if *ss += streamSafe(n); *ss > maxNonStarters {
|
||||
*ss = 0
|
||||
return ssOverflow
|
||||
}
|
||||
// The Stream-Safe Text Processing prescribes that the counting can stop
|
||||
// as soon as a starter is encountered. However, there are some starters,
|
||||
// like Jamo V and T, that can combine with other runes, leaving their
|
||||
// successive non-starters appended to the previous, possibly causing an
|
||||
// overflow. We will therefore consider any rune with a non-zero nLead to
|
||||
// be a non-starter. Note that it always hold that if nLead > 0 then
|
||||
// nLead == nTrail.
|
||||
if n == 0 {
|
||||
*ss = streamSafe(p.nTrailingNonStarters())
|
||||
return ssStarter
|
||||
}
|
||||
return ssSuccess
|
||||
}
|
||||
|
||||
// backwards is used for checking for overflow and segment starts
|
||||
// when traversing a string backwards. Users do not need to call first
|
||||
// for the first rune. The state of the streamSafe retains the count of
|
||||
// the non-starters loaded.
|
||||
func (ss *streamSafe) backwards(p Properties) ssState {
|
||||
if *ss > maxNonStarters {
|
||||
panic("streamSafe was not reset")
|
||||
}
|
||||
c := *ss + streamSafe(p.nTrailingNonStarters())
|
||||
if c > maxNonStarters {
|
||||
return ssOverflow
|
||||
}
|
||||
*ss = c
|
||||
if p.nLeadingNonStarters() == 0 {
|
||||
return ssStarter
|
||||
}
|
||||
return ssSuccess
|
||||
}
|
||||
|
||||
func (ss streamSafe) isMax() bool {
|
||||
return ss == maxNonStarters
|
||||
}
|
||||
|
||||
// GraphemeJoiner is inserted after maxNonStarters non-starter runes.
|
||||
const GraphemeJoiner = "\u034F"
|
||||
|
||||
// reorderBuffer is used to normalize a single segment. Characters inserted with
|
||||
// insert are decomposed and reordered based on CCC. The compose method can
|
||||
// be used to recombine characters. Note that the byte buffer does not hold
|
||||
// the UTF-8 characters in order. Only the rune array is maintained in sorted
|
||||
// order. flush writes the resulting segment to a byte array.
|
||||
type reorderBuffer struct {
|
||||
rune [maxBufferSize]Properties // Per character info.
|
||||
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
|
||||
nbyte uint8 // Number or bytes.
|
||||
ss streamSafe // For limiting length of non-starter sequence.
|
||||
nrune int // Number of runeInfos.
|
||||
f formInfo
|
||||
|
||||
src input
|
||||
nsrc int
|
||||
tmpBytes input
|
||||
|
||||
out []byte
|
||||
flushF func(*reorderBuffer) bool
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) init(f Form, src []byte) {
|
||||
rb.f = *formTable[f]
|
||||
rb.src.setBytes(src)
|
||||
rb.nsrc = len(src)
|
||||
rb.ss = 0
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) initString(f Form, src string) {
|
||||
rb.f = *formTable[f]
|
||||
rb.src.setString(src)
|
||||
rb.nsrc = len(src)
|
||||
rb.ss = 0
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
|
||||
rb.out = out
|
||||
rb.flushF = f
|
||||
}
|
||||
|
||||
// reset discards all characters from the buffer.
|
||||
func (rb *reorderBuffer) reset() {
|
||||
rb.nrune = 0
|
||||
rb.nbyte = 0
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) doFlush() bool {
|
||||
if rb.f.composing {
|
||||
rb.compose()
|
||||
}
|
||||
res := rb.flushF(rb)
|
||||
rb.reset()
|
||||
return res
|
||||
}
|
||||
|
||||
// appendFlush appends the normalized segment to rb.out.
|
||||
func appendFlush(rb *reorderBuffer) bool {
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
start := rb.rune[i].pos
|
||||
end := start + rb.rune[i].size
|
||||
rb.out = append(rb.out, rb.byte[start:end]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// flush appends the normalized segment to out and resets rb.
|
||||
func (rb *reorderBuffer) flush(out []byte) []byte {
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
start := rb.rune[i].pos
|
||||
end := start + rb.rune[i].size
|
||||
out = append(out, rb.byte[start:end]...)
|
||||
}
|
||||
rb.reset()
|
||||
return out
|
||||
}
|
||||
|
||||
// flushCopy copies the normalized segment to buf and resets rb.
|
||||
// It returns the number of bytes written to buf.
|
||||
func (rb *reorderBuffer) flushCopy(buf []byte) int {
|
||||
p := 0
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
runep := rb.rune[i]
|
||||
p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size])
|
||||
}
|
||||
rb.reset()
|
||||
return p
|
||||
}
|
||||
|
||||
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
|
||||
// It returns false if the buffer is not large enough to hold the rune.
|
||||
// It is used internally by insert and insertString only.
|
||||
func (rb *reorderBuffer) insertOrdered(info Properties) {
|
||||
n := rb.nrune
|
||||
b := rb.rune[:]
|
||||
cc := info.ccc
|
||||
if cc > 0 {
|
||||
// Find insertion position + move elements to make room.
|
||||
for ; n > 0; n-- {
|
||||
if b[n-1].ccc <= cc {
|
||||
break
|
||||
}
|
||||
b[n] = b[n-1]
|
||||
}
|
||||
}
|
||||
rb.nrune += 1
|
||||
pos := uint8(rb.nbyte)
|
||||
rb.nbyte += utf8.UTFMax
|
||||
info.pos = pos
|
||||
b[n] = info
|
||||
}
|
||||
|
||||
// insertErr is an error code returned by insert. Using this type instead
|
||||
// of error improves performance up to 20% for many of the benchmarks.
|
||||
type insertErr int
|
||||
|
||||
const (
|
||||
iSuccess insertErr = -iota
|
||||
iShortDst
|
||||
iShortSrc
|
||||
)
|
||||
|
||||
// insertFlush inserts the given rune in the buffer ordered by CCC.
|
||||
// If a decomposition with multiple segments are encountered, they leading
|
||||
// ones are flushed.
|
||||
// It returns a non-zero error code if the rune was not inserted.
|
||||
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr {
|
||||
if rune := src.hangul(i); rune != 0 {
|
||||
rb.decomposeHangul(rune)
|
||||
return iSuccess
|
||||
}
|
||||
if info.hasDecomposition() {
|
||||
return rb.insertDecomposed(info.Decomposition())
|
||||
}
|
||||
rb.insertSingle(src, i, info)
|
||||
return iSuccess
|
||||
}
|
||||
|
||||
// insertUnsafe inserts the given rune in the buffer ordered by CCC.
|
||||
// It is assumed there is sufficient space to hold the runes. It is the
|
||||
// responsibility of the caller to ensure this. This can be done by checking
|
||||
// the state returned by the streamSafe type.
|
||||
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
|
||||
if rune := src.hangul(i); rune != 0 {
|
||||
rb.decomposeHangul(rune)
|
||||
}
|
||||
if info.hasDecomposition() {
|
||||
// TODO: inline.
|
||||
rb.insertDecomposed(info.Decomposition())
|
||||
} else {
|
||||
rb.insertSingle(src, i, info)
|
||||
}
|
||||
}
|
||||
|
||||
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
|
||||
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
|
||||
// It flushes the buffer on each new segment start.
|
||||
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
|
||||
rb.tmpBytes.setBytes(dcomp)
|
||||
// As the streamSafe accounting already handles the counting for modifiers,
|
||||
// we don't have to call next. However, we do need to keep the accounting
|
||||
// intact when flushing the buffer.
|
||||
for i := 0; i < len(dcomp); {
|
||||
info := rb.f.info(rb.tmpBytes, i)
|
||||
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
|
||||
return iShortDst
|
||||
}
|
||||
i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)])
|
||||
rb.insertOrdered(info)
|
||||
}
|
||||
return iSuccess
|
||||
}
|
||||
|
||||
// insertSingle inserts an entry in the reorderBuffer for the rune at
|
||||
// position i. info is the runeInfo for the rune at position i.
|
||||
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) {
|
||||
src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size))
|
||||
rb.insertOrdered(info)
|
||||
}
|
||||
|
||||
// insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
|
||||
func (rb *reorderBuffer) insertCGJ() {
|
||||
rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))})
|
||||
}
|
||||
|
||||
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
|
||||
func (rb *reorderBuffer) appendRune(r rune) {
|
||||
bn := rb.nbyte
|
||||
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
||||
rb.nbyte += utf8.UTFMax
|
||||
rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)}
|
||||
rb.nrune++
|
||||
}
|
||||
|
||||
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) assignRune(pos int, r rune) {
|
||||
bn := rb.rune[pos].pos
|
||||
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
||||
rb.rune[pos] = Properties{pos: bn, size: uint8(sz)}
|
||||
}
|
||||
|
||||
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) runeAt(n int) rune {
|
||||
inf := rb.rune[n]
|
||||
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
|
||||
return r
|
||||
}
|
||||
|
||||
// bytesAt returns the UTF-8 encoding of the rune at position n.
|
||||
// It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) bytesAt(n int) []byte {
|
||||
inf := rb.rune[n]
|
||||
return rb.byte[inf.pos : int(inf.pos)+int(inf.size)]
|
||||
}
|
||||
|
||||
// For Hangul we combine algorithmically, instead of using tables.
|
||||
const (
|
||||
hangulBase = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
|
||||
hangulBase0 = 0xEA
|
||||
hangulBase1 = 0xB0
|
||||
hangulBase2 = 0x80
|
||||
|
||||
hangulEnd = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
|
||||
hangulEnd0 = 0xED
|
||||
hangulEnd1 = 0x9E
|
||||
hangulEnd2 = 0xA4
|
||||
|
||||
jamoLBase = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
|
||||
jamoLBase0 = 0xE1
|
||||
jamoLBase1 = 0x84
|
||||
jamoLEnd = 0x1113
|
||||
jamoVBase = 0x1161
|
||||
jamoVEnd = 0x1176
|
||||
jamoTBase = 0x11A7
|
||||
jamoTEnd = 0x11C3
|
||||
|
||||
jamoTCount = 28
|
||||
jamoVCount = 21
|
||||
jamoVTCount = 21 * 28
|
||||
jamoLVTCount = 19 * 21 * 28
|
||||
)
|
||||
|
||||
const hangulUTF8Size = 3
|
||||
|
||||
func isHangul(b []byte) bool {
|
||||
if len(b) < hangulUTF8Size {
|
||||
return false
|
||||
}
|
||||
b0 := b[0]
|
||||
if b0 < hangulBase0 {
|
||||
return false
|
||||
}
|
||||
b1 := b[1]
|
||||
switch {
|
||||
case b0 == hangulBase0:
|
||||
return b1 >= hangulBase1
|
||||
case b0 < hangulEnd0:
|
||||
return true
|
||||
case b0 > hangulEnd0:
|
||||
return false
|
||||
case b1 < hangulEnd1:
|
||||
return true
|
||||
}
|
||||
return b1 == hangulEnd1 && b[2] < hangulEnd2
|
||||
}
|
||||
|
||||
func isHangulString(b string) bool {
|
||||
if len(b) < hangulUTF8Size {
|
||||
return false
|
||||
}
|
||||
b0 := b[0]
|
||||
if b0 < hangulBase0 {
|
||||
return false
|
||||
}
|
||||
b1 := b[1]
|
||||
switch {
|
||||
case b0 == hangulBase0:
|
||||
return b1 >= hangulBase1
|
||||
case b0 < hangulEnd0:
|
||||
return true
|
||||
case b0 > hangulEnd0:
|
||||
return false
|
||||
case b1 < hangulEnd1:
|
||||
return true
|
||||
}
|
||||
return b1 == hangulEnd1 && b[2] < hangulEnd2
|
||||
}
|
||||
|
||||
// Caller must ensure len(b) >= 2.
|
||||
func isJamoVT(b []byte) bool {
|
||||
// True if (rune & 0xff00) == jamoLBase
|
||||
return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
|
||||
}
|
||||
|
||||
func isHangulWithoutJamoT(b []byte) bool {
|
||||
c, _ := utf8.DecodeRune(b)
|
||||
c -= hangulBase
|
||||
return c < jamoLVTCount && c%jamoTCount == 0
|
||||
}
|
||||
|
||||
// decomposeHangul writes the decomposed Hangul to buf and returns the number
|
||||
// of bytes written. len(buf) should be at least 9.
|
||||
func decomposeHangul(buf []byte, r rune) int {
|
||||
const JamoUTF8Len = 3
|
||||
r -= hangulBase
|
||||
x := r % jamoTCount
|
||||
r /= jamoTCount
|
||||
utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
|
||||
utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
|
||||
if x != 0 {
|
||||
utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
|
||||
return 3 * JamoUTF8Len
|
||||
}
|
||||
return 2 * JamoUTF8Len
|
||||
}
|
||||
|
||||
// decomposeHangul algorithmically decomposes a Hangul rune into
|
||||
// its Jamo components.
|
||||
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
|
||||
func (rb *reorderBuffer) decomposeHangul(r rune) {
|
||||
r -= hangulBase
|
||||
x := r % jamoTCount
|
||||
r /= jamoTCount
|
||||
rb.appendRune(jamoLBase + r/jamoVCount)
|
||||
rb.appendRune(jamoVBase + r%jamoVCount)
|
||||
if x != 0 {
|
||||
rb.appendRune(jamoTBase + x)
|
||||
}
|
||||
}
|
||||
|
||||
// combineHangul algorithmically combines Jamo character components into Hangul.
|
||||
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
|
||||
func (rb *reorderBuffer) combineHangul(s, i, k int) {
|
||||
b := rb.rune[:]
|
||||
bn := rb.nrune
|
||||
for ; i < bn; i++ {
|
||||
cccB := b[k-1].ccc
|
||||
cccC := b[i].ccc
|
||||
if cccB == 0 {
|
||||
s = k - 1
|
||||
}
|
||||
if s != k-1 && cccB >= cccC {
|
||||
// b[i] is blocked by greater-equal cccX below it
|
||||
b[k] = b[i]
|
||||
k++
|
||||
} else {
|
||||
l := rb.runeAt(s) // also used to compare to hangulBase
|
||||
v := rb.runeAt(i) // also used to compare to jamoT
|
||||
switch {
|
||||
case jamoLBase <= l && l < jamoLEnd &&
|
||||
jamoVBase <= v && v < jamoVEnd:
|
||||
// 11xx plus 116x to LV
|
||||
rb.assignRune(s, hangulBase+
|
||||
(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount)
|
||||
case hangulBase <= l && l < hangulEnd &&
|
||||
jamoTBase < v && v < jamoTEnd &&
|
||||
((l-hangulBase)%jamoTCount) == 0:
|
||||
// ACxx plus 11Ax to LVT
|
||||
rb.assignRune(s, l+v-jamoTBase)
|
||||
default:
|
||||
b[k] = b[i]
|
||||
k++
|
||||
}
|
||||
}
|
||||
}
|
||||
rb.nrune = k
|
||||
}
|
||||
|
||||
// compose recombines the runes in the buffer.
|
||||
// It should only be used to recompose a single segment, as it will not
|
||||
// handle alternations between Hangul and non-Hangul characters correctly.
|
||||
func (rb *reorderBuffer) compose() {
|
||||
// UAX #15, section X5 , including Corrigendum #5
|
||||
// "In any character sequence beginning with starter S, a character C is
|
||||
// blocked from S if and only if there is some character B between S
|
||||
// and C, and either B is a starter or it has the same or higher
|
||||
// combining class as C."
|
||||
bn := rb.nrune
|
||||
if bn == 0 {
|
||||
return
|
||||
}
|
||||
k := 1
|
||||
b := rb.rune[:]
|
||||
for s, i := 0, 1; i < bn; i++ {
|
||||
if isJamoVT(rb.bytesAt(i)) {
|
||||
// Redo from start in Hangul mode. Necessary to support
|
||||
// U+320E..U+321E in NFKC mode.
|
||||
rb.combineHangul(s, i, k)
|
||||
return
|
||||
}
|
||||
ii := b[i]
|
||||
// We can only use combineForward as a filter if we later
|
||||
// get the info for the combined character. This is more
|
||||
// expensive than using the filter. Using combinesBackward()
|
||||
// is safe.
|
||||
if ii.combinesBackward() {
|
||||
cccB := b[k-1].ccc
|
||||
cccC := ii.ccc
|
||||
blocked := false // b[i] blocked by starter or greater or equal CCC?
|
||||
if cccB == 0 {
|
||||
s = k - 1
|
||||
} else {
|
||||
blocked = s != k-1 && cccB >= cccC
|
||||
}
|
||||
if !blocked {
|
||||
combined := combine(rb.runeAt(s), rb.runeAt(i))
|
||||
if combined != 0 {
|
||||
rb.assignRune(s, combined)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
b[k] = b[i]
|
||||
k++
|
||||
}
|
||||
rb.nrune = k
|
||||
}
|
|
@ -0,0 +1,259 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
// This file contains Form-specific logic and wrappers for data in tables.go.
|
||||
|
||||
// Rune info is stored in a separate trie per composing form. A composing form
|
||||
// and its corresponding decomposing form share the same trie. Each trie maps
|
||||
// a rune to a uint16. The values take two forms. For v >= 0x8000:
|
||||
// bits
|
||||
// 15: 1 (inverse of NFD_QC bit of qcInfo)
|
||||
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
|
||||
// 6..0: ccc (compressed CCC value).
|
||||
// For v < 0x8000, the respective rune has a decomposition and v is an index
|
||||
// into a byte array of UTF-8 decomposition sequences and additional info and
|
||||
// has the form:
|
||||
// <header> <decomp_byte>* [<tccc> [<lccc>]]
|
||||
// The header contains the number of bytes in the decomposition (excluding this
|
||||
// length byte). The two most significant bits of this length byte correspond
|
||||
// to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1.
|
||||
// The byte sequence is followed by a trailing and leading CCC if the values
|
||||
// for these are not zero. The value of v determines which ccc are appended
|
||||
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
|
||||
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
|
||||
// there is an additional leading ccc. The value of tccc itself is the
|
||||
// trailing CCC shifted left 2 bits. The two least-significant bits of tccc
|
||||
// are the number of trailing non-starters.
|
||||
|
||||
const (
|
||||
qcInfoMask = 0x3F // to clear all but the relevant bits in a qcInfo
|
||||
headerLenMask = 0x3F // extract the length value from the header byte
|
||||
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
|
||||
)
|
||||
|
||||
// Properties provides access to normalization properties of a rune.
|
||||
type Properties struct {
|
||||
pos uint8 // start position in reorderBuffer; used in composition.go
|
||||
size uint8 // length of UTF-8 encoding of this rune
|
||||
ccc uint8 // leading canonical combining class (ccc if not decomposition)
|
||||
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
|
||||
nLead uint8 // number of leading non-starters.
|
||||
flags qcInfo // quick check flags
|
||||
index uint16
|
||||
}
|
||||
|
||||
// functions dispatchable per form
|
||||
type lookupFunc func(b input, i int) Properties
|
||||
|
||||
// formInfo holds Form-specific functions and tables.
|
||||
type formInfo struct {
|
||||
form Form
|
||||
composing, compatibility bool // form type
|
||||
info lookupFunc
|
||||
nextMain iterFunc
|
||||
}
|
||||
|
||||
var formTable = []*formInfo{{
|
||||
form: NFC,
|
||||
composing: true,
|
||||
compatibility: false,
|
||||
info: lookupInfoNFC,
|
||||
nextMain: nextComposed,
|
||||
}, {
|
||||
form: NFD,
|
||||
composing: false,
|
||||
compatibility: false,
|
||||
info: lookupInfoNFC,
|
||||
nextMain: nextDecomposed,
|
||||
}, {
|
||||
form: NFKC,
|
||||
composing: true,
|
||||
compatibility: true,
|
||||
info: lookupInfoNFKC,
|
||||
nextMain: nextComposed,
|
||||
}, {
|
||||
form: NFKD,
|
||||
composing: false,
|
||||
compatibility: true,
|
||||
info: lookupInfoNFKC,
|
||||
nextMain: nextDecomposed,
|
||||
}}
|
||||
|
||||
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
|
||||
// unexpected behavior for the user. For example, in NFD, there is a boundary
|
||||
// after 'a'. However, 'a' might combine with modifiers, so from the application's
|
||||
// perspective it is not a good boundary. We will therefore always use the
|
||||
// boundaries for the combining variants.
|
||||
|
||||
// BoundaryBefore returns true if this rune starts a new segment and
|
||||
// cannot combine with any rune on the left.
|
||||
func (p Properties) BoundaryBefore() bool {
|
||||
if p.ccc == 0 && !p.combinesBackward() {
|
||||
return true
|
||||
}
|
||||
// We assume that the CCC of the first character in a decomposition
|
||||
// is always non-zero if different from info.ccc and that we can return
|
||||
// false at this point. This is verified by maketables.
|
||||
return false
|
||||
}
|
||||
|
||||
// BoundaryAfter returns true if runes cannot combine with or otherwise
|
||||
// interact with this or previous runes.
|
||||
func (p Properties) BoundaryAfter() bool {
|
||||
// TODO: loosen these conditions.
|
||||
return p.isInert()
|
||||
}
|
||||
|
||||
// We pack quick check data in 4 bits:
|
||||
// 5: Combines forward (0 == false, 1 == true)
|
||||
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
|
||||
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
|
||||
// 1..0: Number of trailing non-starters.
|
||||
//
|
||||
// When all 4 bits are zero, the character is inert, meaning it is never
|
||||
// influenced by normalization.
|
||||
type qcInfo uint8
|
||||
|
||||
func (p Properties) isYesC() bool { return p.flags&0x10 == 0 }
|
||||
func (p Properties) isYesD() bool { return p.flags&0x4 == 0 }
|
||||
|
||||
func (p Properties) combinesForward() bool { return p.flags&0x20 != 0 }
|
||||
func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe
|
||||
func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD
|
||||
|
||||
func (p Properties) isInert() bool {
|
||||
return p.flags&qcInfoMask == 0 && p.ccc == 0
|
||||
}
|
||||
|
||||
func (p Properties) multiSegment() bool {
|
||||
return p.index >= firstMulti && p.index < endMulti
|
||||
}
|
||||
|
||||
func (p Properties) nLeadingNonStarters() uint8 {
|
||||
return p.nLead
|
||||
}
|
||||
|
||||
func (p Properties) nTrailingNonStarters() uint8 {
|
||||
return uint8(p.flags & 0x03)
|
||||
}
|
||||
|
||||
// Decomposition returns the decomposition for the underlying rune
|
||||
// or nil if there is none.
|
||||
func (p Properties) Decomposition() []byte {
|
||||
// TODO: create the decomposition for Hangul?
|
||||
if p.index == 0 {
|
||||
return nil
|
||||
}
|
||||
i := p.index
|
||||
n := decomps[i] & headerLenMask
|
||||
i++
|
||||
return decomps[i : i+uint16(n)]
|
||||
}
|
||||
|
||||
// Size returns the length of UTF-8 encoding of the rune.
|
||||
func (p Properties) Size() int {
|
||||
return int(p.size)
|
||||
}
|
||||
|
||||
// CCC returns the canonical combining class of the underlying rune.
|
||||
func (p Properties) CCC() uint8 {
|
||||
if p.index >= firstCCCZeroExcept {
|
||||
return 0
|
||||
}
|
||||
return ccc[p.ccc]
|
||||
}
|
||||
|
||||
// LeadCCC returns the CCC of the first rune in the decomposition.
|
||||
// If there is no decomposition, LeadCCC equals CCC.
|
||||
func (p Properties) LeadCCC() uint8 {
|
||||
return ccc[p.ccc]
|
||||
}
|
||||
|
||||
// TrailCCC returns the CCC of the last rune in the decomposition.
|
||||
// If there is no decomposition, TrailCCC equals CCC.
|
||||
func (p Properties) TrailCCC() uint8 {
|
||||
return ccc[p.tccc]
|
||||
}
|
||||
|
||||
// Recomposition
|
||||
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
|
||||
// This clips off the bits of three entries, but we know this will not
|
||||
// result in a collision. In the unlikely event that changes to
|
||||
// UnicodeData.txt introduce collisions, the compiler will catch it.
|
||||
// Note that the recomposition map for NFC and NFKC are identical.
|
||||
|
||||
// combine returns the combined rune or 0 if it doesn't exist.
|
||||
func combine(a, b rune) rune {
|
||||
key := uint32(uint16(a))<<16 + uint32(uint16(b))
|
||||
return recompMap[key]
|
||||
}
|
||||
|
||||
func lookupInfoNFC(b input, i int) Properties {
|
||||
v, sz := b.charinfoNFC(i)
|
||||
return compInfo(v, sz)
|
||||
}
|
||||
|
||||
func lookupInfoNFKC(b input, i int) Properties {
|
||||
v, sz := b.charinfoNFKC(i)
|
||||
return compInfo(v, sz)
|
||||
}
|
||||
|
||||
// Properties returns properties for the first rune in s.
|
||||
func (f Form) Properties(s []byte) Properties {
|
||||
if f == NFC || f == NFD {
|
||||
return compInfo(nfcData.lookup(s))
|
||||
}
|
||||
return compInfo(nfkcData.lookup(s))
|
||||
}
|
||||
|
||||
// PropertiesString returns properties for the first rune in s.
|
||||
func (f Form) PropertiesString(s string) Properties {
|
||||
if f == NFC || f == NFD {
|
||||
return compInfo(nfcData.lookupString(s))
|
||||
}
|
||||
return compInfo(nfkcData.lookupString(s))
|
||||
}
|
||||
|
||||
// compInfo converts the information contained in v and sz
|
||||
// to a Properties. See the comment at the top of the file
|
||||
// for more information on the format.
|
||||
func compInfo(v uint16, sz int) Properties {
|
||||
if v == 0 {
|
||||
return Properties{size: uint8(sz)}
|
||||
} else if v >= 0x8000 {
|
||||
p := Properties{
|
||||
size: uint8(sz),
|
||||
ccc: uint8(v),
|
||||
tccc: uint8(v),
|
||||
flags: qcInfo(v >> 8),
|
||||
}
|
||||
if p.ccc > 0 || p.combinesBackward() {
|
||||
p.nLead = uint8(p.flags & 0x3)
|
||||
}
|
||||
return p
|
||||
}
|
||||
// has decomposition
|
||||
h := decomps[v]
|
||||
f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4
|
||||
p := Properties{size: uint8(sz), flags: f, index: v}
|
||||
if v >= firstCCC {
|
||||
v += uint16(h&headerLenMask) + 1
|
||||
c := decomps[v]
|
||||
p.tccc = c >> 2
|
||||
p.flags |= qcInfo(c & 0x3)
|
||||
if v >= firstLeadingCCC {
|
||||
p.nLead = c & 0x3
|
||||
if v >= firstStarterWithNLead {
|
||||
// We were tricked. Remove the decomposition.
|
||||
p.flags &= 0x03
|
||||
p.index = 0
|
||||
return p
|
||||
}
|
||||
p.ccc = decomps[v+1]
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
type input struct {
|
||||
str string
|
||||
bytes []byte
|
||||
}
|
||||
|
||||
func inputBytes(str []byte) input {
|
||||
return input{bytes: str}
|
||||
}
|
||||
|
||||
func inputString(str string) input {
|
||||
return input{str: str}
|
||||
}
|
||||
|
||||
func (in *input) setBytes(str []byte) {
|
||||
in.str = ""
|
||||
in.bytes = str
|
||||
}
|
||||
|
||||
func (in *input) setString(str string) {
|
||||
in.str = str
|
||||
in.bytes = nil
|
||||
}
|
||||
|
||||
func (in *input) _byte(p int) byte {
|
||||
if in.bytes == nil {
|
||||
return in.str[p]
|
||||
}
|
||||
return in.bytes[p]
|
||||
}
|
||||
|
||||
func (in *input) skipASCII(p, max int) int {
|
||||
if in.bytes == nil {
|
||||
for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
|
||||
}
|
||||
} else {
|
||||
for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (in *input) skipContinuationBytes(p int) int {
|
||||
if in.bytes == nil {
|
||||
for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
|
||||
}
|
||||
} else {
|
||||
for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (in *input) appendSlice(buf []byte, b, e int) []byte {
|
||||
if in.bytes != nil {
|
||||
return append(buf, in.bytes[b:e]...)
|
||||
}
|
||||
for i := b; i < e; i++ {
|
||||
buf = append(buf, in.str[i])
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (in *input) copySlice(buf []byte, b, e int) int {
|
||||
if in.bytes == nil {
|
||||
return copy(buf, in.str[b:e])
|
||||
}
|
||||
return copy(buf, in.bytes[b:e])
|
||||
}
|
||||
|
||||
func (in *input) charinfoNFC(p int) (uint16, int) {
|
||||
if in.bytes == nil {
|
||||
return nfcData.lookupString(in.str[p:])
|
||||
}
|
||||
return nfcData.lookup(in.bytes[p:])
|
||||
}
|
||||
|
||||
func (in *input) charinfoNFKC(p int) (uint16, int) {
|
||||
if in.bytes == nil {
|
||||
return nfkcData.lookupString(in.str[p:])
|
||||
}
|
||||
return nfkcData.lookup(in.bytes[p:])
|
||||
}
|
||||
|
||||
func (in *input) hangul(p int) (r rune) {
|
||||
var size int
|
||||
if in.bytes == nil {
|
||||
if !isHangulString(in.str[p:]) {
|
||||
return 0
|
||||
}
|
||||
r, size = utf8.DecodeRuneInString(in.str[p:])
|
||||
} else {
|
||||
if !isHangul(in.bytes[p:]) {
|
||||
return 0
|
||||
}
|
||||
r, size = utf8.DecodeRune(in.bytes[p:])
|
||||
}
|
||||
if size != hangulUTF8Size {
|
||||
return 0
|
||||
}
|
||||
return r
|
||||
}
|
|
@ -0,0 +1,457 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
|
||||
// sequence of starter and non-starter runes for the purpose of normalization.
|
||||
const MaxSegmentSize = maxByteBufferSize
|
||||
|
||||
// An Iter iterates over a string or byte slice, while normalizing it
|
||||
// to a given Form.
|
||||
type Iter struct {
|
||||
rb reorderBuffer
|
||||
buf [maxByteBufferSize]byte
|
||||
info Properties // first character saved from previous iteration
|
||||
next iterFunc // implementation of next depends on form
|
||||
asciiF iterFunc
|
||||
|
||||
p int // current position in input source
|
||||
multiSeg []byte // remainder of multi-segment decomposition
|
||||
}
|
||||
|
||||
type iterFunc func(*Iter) []byte
|
||||
|
||||
// Init initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) Init(f Form, src []byte) {
|
||||
i.p = 0
|
||||
if len(src) == 0 {
|
||||
i.setDone()
|
||||
i.rb.nsrc = 0
|
||||
return
|
||||
}
|
||||
i.multiSeg = nil
|
||||
i.rb.init(f, src)
|
||||
i.next = i.rb.f.nextMain
|
||||
i.asciiF = nextASCIIBytes
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
}
|
||||
|
||||
// InitString initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) InitString(f Form, src string) {
|
||||
i.p = 0
|
||||
if len(src) == 0 {
|
||||
i.setDone()
|
||||
i.rb.nsrc = 0
|
||||
return
|
||||
}
|
||||
i.multiSeg = nil
|
||||
i.rb.initString(f, src)
|
||||
i.next = i.rb.f.nextMain
|
||||
i.asciiF = nextASCIIString
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
}
|
||||
|
||||
// Seek sets the segment to be returned by the next call to Next to start
|
||||
// at position p. It is the responsibility of the caller to set p to the
|
||||
// start of a segment.
|
||||
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
||||
var abs int64
|
||||
switch whence {
|
||||
case 0:
|
||||
abs = offset
|
||||
case 1:
|
||||
abs = int64(i.p) + offset
|
||||
case 2:
|
||||
abs = int64(i.rb.nsrc) + offset
|
||||
default:
|
||||
return 0, fmt.Errorf("norm: invalid whence")
|
||||
}
|
||||
if abs < 0 {
|
||||
return 0, fmt.Errorf("norm: negative position")
|
||||
}
|
||||
if int(abs) >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
return int64(i.p), nil
|
||||
}
|
||||
i.p = int(abs)
|
||||
i.multiSeg = nil
|
||||
i.next = i.rb.f.nextMain
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
// returnSlice returns a slice of the underlying input type as a byte slice.
|
||||
// If the underlying is of type []byte, it will simply return a slice.
|
||||
// If the underlying is of type string, it will copy the slice to the buffer
|
||||
// and return that.
|
||||
func (i *Iter) returnSlice(a, b int) []byte {
|
||||
if i.rb.src.bytes == nil {
|
||||
return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
|
||||
}
|
||||
return i.rb.src.bytes[a:b]
|
||||
}
|
||||
|
||||
// Pos returns the byte position at which the next call to Next will commence processing.
|
||||
func (i *Iter) Pos() int {
|
||||
return i.p
|
||||
}
|
||||
|
||||
func (i *Iter) setDone() {
|
||||
i.next = nextDone
|
||||
i.p = i.rb.nsrc
|
||||
}
|
||||
|
||||
// Done returns true if there is no more input to process.
|
||||
func (i *Iter) Done() bool {
|
||||
return i.p >= i.rb.nsrc
|
||||
}
|
||||
|
||||
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
|
||||
// For any input a and b for which f(a) == f(b), subsequent calls
|
||||
// to Next will return the same segments.
|
||||
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
|
||||
// Although not guaranteed, n will typically be the smallest possible n.
|
||||
func (i *Iter) Next() []byte {
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
func nextASCIIBytes(i *Iter) []byte {
|
||||
p := i.p + 1
|
||||
if p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
return i.rb.src.bytes[i.p:p]
|
||||
}
|
||||
if i.rb.src.bytes[p] < utf8.RuneSelf {
|
||||
p0 := i.p
|
||||
i.p = p
|
||||
return i.rb.src.bytes[p0:p]
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
func nextASCIIString(i *Iter) []byte {
|
||||
p := i.p + 1
|
||||
if p >= i.rb.nsrc {
|
||||
i.buf[0] = i.rb.src.str[i.p]
|
||||
i.setDone()
|
||||
return i.buf[:1]
|
||||
}
|
||||
if i.rb.src.str[p] < utf8.RuneSelf {
|
||||
i.buf[0] = i.rb.src.str[i.p]
|
||||
i.p = p
|
||||
return i.buf[:1]
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
func nextHangul(i *Iter) []byte {
|
||||
p := i.p
|
||||
next := p + hangulUTF8Size
|
||||
if next >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
} else if i.rb.src.hangul(next) == 0 {
|
||||
i.rb.ss.next(i.info)
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
}
|
||||
i.p = next
|
||||
return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))]
|
||||
}
|
||||
|
||||
func nextDone(i *Iter) []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
// nextMulti is used for iterating over multi-segment decompositions
|
||||
// for decomposing normal forms.
|
||||
func nextMulti(i *Iter) []byte {
|
||||
j := 0
|
||||
d := i.multiSeg
|
||||
// skip first rune
|
||||
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
|
||||
}
|
||||
for j < len(d) {
|
||||
info := i.rb.f.info(input{bytes: d}, j)
|
||||
if info.BoundaryBefore() {
|
||||
i.multiSeg = d[j:]
|
||||
return d[:j]
|
||||
}
|
||||
j += int(info.size)
|
||||
}
|
||||
// treat last segment as normal decomposition
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
// nextMultiNorm is used for iterating over multi-segment decompositions
|
||||
// for composing normal forms.
|
||||
func nextMultiNorm(i *Iter) []byte {
|
||||
j := 0
|
||||
d := i.multiSeg
|
||||
for j < len(d) {
|
||||
info := i.rb.f.info(input{bytes: d}, j)
|
||||
if info.BoundaryBefore() {
|
||||
i.rb.compose()
|
||||
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
||||
i.multiSeg = d[j+int(info.size):]
|
||||
return seg
|
||||
}
|
||||
i.rb.insertUnsafe(input{bytes: d}, j, info)
|
||||
j += int(info.size)
|
||||
}
|
||||
i.multiSeg = nil
|
||||
i.next = nextComposed
|
||||
return doNormComposed(i)
|
||||
}
|
||||
|
||||
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
|
||||
func nextDecomposed(i *Iter) (next []byte) {
|
||||
outp := 0
|
||||
inCopyStart, outCopyStart := i.p, 0
|
||||
for {
|
||||
if sz := int(i.info.size); sz <= 1 {
|
||||
i.rb.ss = 0
|
||||
p := i.p
|
||||
i.p++ // ASCII or illegal byte. Either way, advance by 1.
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
return i.returnSlice(p, i.p)
|
||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
|
||||
i.next = i.asciiF
|
||||
return i.returnSlice(p, i.p)
|
||||
}
|
||||
outp++
|
||||
} else if d := i.info.Decomposition(); d != nil {
|
||||
// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
|
||||
// Case 1: there is a leftover to copy. In this case the decomposition
|
||||
// must begin with a modifier and should always be appended.
|
||||
// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
|
||||
p := outp + len(d)
|
||||
if outp > 0 {
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||
// TODO: this condition should not be possible, but we leave it
|
||||
// in for defensive purposes.
|
||||
if p > len(i.buf) {
|
||||
return i.buf[:outp]
|
||||
}
|
||||
} else if i.info.multiSegment() {
|
||||
// outp must be 0 as multi-segment decompositions always
|
||||
// start a new segment.
|
||||
if i.multiSeg == nil {
|
||||
i.multiSeg = d
|
||||
i.next = nextMulti
|
||||
return nextMulti(i)
|
||||
}
|
||||
// We are in the last segment. Treat as normal decomposition.
|
||||
d = i.multiSeg
|
||||
i.multiSeg = nil
|
||||
p = len(d)
|
||||
}
|
||||
prevCC := i.info.tccc
|
||||
if i.p += sz; i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
i.info = Properties{} // Force BoundaryBefore to succeed.
|
||||
} else {
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
switch i.rb.ss.next(i.info) {
|
||||
case ssOverflow:
|
||||
i.next = nextCGJDecompose
|
||||
fallthrough
|
||||
case ssStarter:
|
||||
if outp > 0 {
|
||||
copy(i.buf[outp:], d)
|
||||
return i.buf[:p]
|
||||
}
|
||||
return d
|
||||
}
|
||||
copy(i.buf[outp:], d)
|
||||
outp = p
|
||||
inCopyStart, outCopyStart = i.p, outp
|
||||
if i.info.ccc < prevCC {
|
||||
goto doNorm
|
||||
}
|
||||
continue
|
||||
} else if r := i.rb.src.hangul(i.p); r != 0 {
|
||||
outp = decomposeHangul(i.buf[:], r)
|
||||
i.p += hangulUTF8Size
|
||||
inCopyStart, outCopyStart = i.p, outp
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
break
|
||||
} else if i.rb.src.hangul(i.p) != 0 {
|
||||
i.next = nextHangul
|
||||
return i.buf[:outp]
|
||||
}
|
||||
} else {
|
||||
p := outp + sz
|
||||
if p > len(i.buf) {
|
||||
break
|
||||
}
|
||||
outp = p
|
||||
i.p += sz
|
||||
}
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
break
|
||||
}
|
||||
prevCC := i.info.tccc
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if v := i.rb.ss.next(i.info); v == ssStarter {
|
||||
break
|
||||
} else if v == ssOverflow {
|
||||
i.next = nextCGJDecompose
|
||||
break
|
||||
}
|
||||
if i.info.ccc < prevCC {
|
||||
goto doNorm
|
||||
}
|
||||
}
|
||||
if outCopyStart == 0 {
|
||||
return i.returnSlice(inCopyStart, i.p)
|
||||
} else if inCopyStart < i.p {
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||
}
|
||||
return i.buf[:outp]
|
||||
doNorm:
|
||||
// Insert what we have decomposed so far in the reorderBuffer.
|
||||
// As we will only reorder, there will always be enough room.
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||
i.rb.insertDecomposed(i.buf[0:outp])
|
||||
return doNormDecomposed(i)
|
||||
}
|
||||
|
||||
func doNormDecomposed(i *Iter) []byte {
|
||||
for {
|
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
|
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if i.info.ccc == 0 {
|
||||
break
|
||||
}
|
||||
if s := i.rb.ss.next(i.info); s == ssOverflow {
|
||||
i.next = nextCGJDecompose
|
||||
break
|
||||
}
|
||||
}
|
||||
// new segment or too many combining characters: exit normalization
|
||||
return i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
}
|
||||
|
||||
func nextCGJDecompose(i *Iter) []byte {
|
||||
i.rb.ss = 0
|
||||
i.rb.insertCGJ()
|
||||
i.next = nextDecomposed
|
||||
i.rb.ss.first(i.info)
|
||||
buf := doNormDecomposed(i)
|
||||
return buf
|
||||
}
|
||||
|
||||
// nextComposed is the implementation of Next for forms NFC and NFKC.
|
||||
func nextComposed(i *Iter) []byte {
|
||||
outp, startp := 0, i.p
|
||||
var prevCC uint8
|
||||
for {
|
||||
if !i.info.isYesC() {
|
||||
goto doNorm
|
||||
}
|
||||
prevCC = i.info.tccc
|
||||
sz := int(i.info.size)
|
||||
if sz == 0 {
|
||||
sz = 1 // illegal rune: copy byte-by-byte
|
||||
}
|
||||
p := outp + sz
|
||||
if p > len(i.buf) {
|
||||
break
|
||||
}
|
||||
outp = p
|
||||
i.p += sz
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
break
|
||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
|
||||
i.rb.ss = 0
|
||||
i.next = i.asciiF
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if v := i.rb.ss.next(i.info); v == ssStarter {
|
||||
break
|
||||
} else if v == ssOverflow {
|
||||
i.next = nextCGJCompose
|
||||
break
|
||||
}
|
||||
if i.info.ccc < prevCC {
|
||||
goto doNorm
|
||||
}
|
||||
}
|
||||
return i.returnSlice(startp, i.p)
|
||||
doNorm:
|
||||
// reset to start position
|
||||
i.p = startp
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.rb.ss.first(i.info)
|
||||
if i.info.multiSegment() {
|
||||
d := i.info.Decomposition()
|
||||
info := i.rb.f.info(input{bytes: d}, 0)
|
||||
i.rb.insertUnsafe(input{bytes: d}, 0, info)
|
||||
i.multiSeg = d[int(info.size):]
|
||||
i.next = nextMultiNorm
|
||||
return nextMultiNorm(i)
|
||||
}
|
||||
i.rb.ss.first(i.info)
|
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
|
||||
return doNormComposed(i)
|
||||
}
|
||||
|
||||
func doNormComposed(i *Iter) []byte {
|
||||
// First rune should already be inserted.
|
||||
for {
|
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if s := i.rb.ss.next(i.info); s == ssStarter {
|
||||
break
|
||||
} else if s == ssOverflow {
|
||||
i.next = nextCGJCompose
|
||||
break
|
||||
}
|
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
|
||||
}
|
||||
i.rb.compose()
|
||||
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
return seg
|
||||
}
|
||||
|
||||
func nextCGJCompose(i *Iter) []byte {
|
||||
i.rb.ss = 0 // instead of first
|
||||
i.rb.insertCGJ()
|
||||
i.next = nextComposed
|
||||
// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
|
||||
// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
|
||||
// If we ever change that, insert a check here.
|
||||
i.rb.ss.first(i.info)
|
||||
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
|
||||
return doNormComposed(i)
|
||||
}
|
|
@ -0,0 +1,976 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Normalization table generator.
|
||||
// Data read from the web.
|
||||
// See forminfo.go for a description of the trie values associated with each rune.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
loadUnicodeData()
|
||||
compactCCC()
|
||||
loadCompositionExclusions()
|
||||
completeCharFields(FCanonical)
|
||||
completeCharFields(FCompatibility)
|
||||
computeNonStarterCounts()
|
||||
verifyComputed()
|
||||
printChars()
|
||||
testDerived()
|
||||
printTestdata()
|
||||
makeTables()
|
||||
}
|
||||
|
||||
var (
|
||||
tablelist = flag.String("tables",
|
||||
"all",
|
||||
"comma-separated list of which tables to generate; "+
|
||||
"can be 'decomp', 'recomp', 'info' and 'all'")
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables against DerivedNormalizationProps and generate test data for regression testing")
|
||||
verbose = flag.Bool("verbose",
|
||||
false,
|
||||
"write data to stdout as it is parsed")
|
||||
)
|
||||
|
||||
const MaxChar = 0x10FFFF // anything above this shouldn't exist
|
||||
|
||||
// Quick Check properties of runes allow us to quickly
|
||||
// determine whether a rune may occur in a normal form.
|
||||
// For a given normal form, a rune may be guaranteed to occur
|
||||
// verbatim (QC=Yes), may or may not combine with another
|
||||
// rune (QC=Maybe), or may not occur (QC=No).
|
||||
type QCResult int
|
||||
|
||||
const (
|
||||
QCUnknown QCResult = iota
|
||||
QCYes
|
||||
QCNo
|
||||
QCMaybe
|
||||
)
|
||||
|
||||
func (r QCResult) String() string {
|
||||
switch r {
|
||||
case QCYes:
|
||||
return "Yes"
|
||||
case QCNo:
|
||||
return "No"
|
||||
case QCMaybe:
|
||||
return "Maybe"
|
||||
}
|
||||
return "***UNKNOWN***"
|
||||
}
|
||||
|
||||
const (
|
||||
FCanonical = iota // NFC or NFD
|
||||
FCompatibility // NFKC or NFKD
|
||||
FNumberOfFormTypes
|
||||
)
|
||||
|
||||
const (
|
||||
MComposed = iota // NFC or NFKC
|
||||
MDecomposed // NFD or NFKD
|
||||
MNumberOfModes
|
||||
)
|
||||
|
||||
// This contains only the properties we're interested in.
|
||||
type Char struct {
|
||||
name string
|
||||
codePoint rune // if zero, this index is not a valid code point.
|
||||
ccc uint8 // canonical combining class
|
||||
origCCC uint8
|
||||
excludeInComp bool // from CompositionExclusions.txt
|
||||
compatDecomp bool // it has a compatibility expansion
|
||||
|
||||
nTrailingNonStarters uint8
|
||||
nLeadingNonStarters uint8 // must be equal to trailing if non-zero
|
||||
|
||||
forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
|
||||
|
||||
state State
|
||||
}
|
||||
|
||||
var chars = make([]Char, MaxChar+1)
|
||||
var cccMap = make(map[uint8]uint8)
|
||||
|
||||
func (c Char) String() string {
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
|
||||
fmt.Fprintf(buf, " ccc: %v\n", c.ccc)
|
||||
fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp)
|
||||
fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp)
|
||||
fmt.Fprintf(buf, " state: %v\n", c.state)
|
||||
fmt.Fprintf(buf, " NFC:\n")
|
||||
fmt.Fprint(buf, c.forms[FCanonical])
|
||||
fmt.Fprintf(buf, " NFKC:\n")
|
||||
fmt.Fprint(buf, c.forms[FCompatibility])
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// In UnicodeData.txt, some ranges are marked like this:
|
||||
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
||||
// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
|
||||
// parseCharacter keeps a state variable indicating the weirdness.
|
||||
type State int
|
||||
|
||||
const (
|
||||
SNormal State = iota // known to be zero for the type
|
||||
SFirst
|
||||
SLast
|
||||
SMissing
|
||||
)
|
||||
|
||||
var lastChar = rune('\u0000')
|
||||
|
||||
func (c Char) isValid() bool {
|
||||
return c.codePoint != 0 && c.state != SMissing
|
||||
}
|
||||
|
||||
type FormInfo struct {
|
||||
quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
|
||||
verified [MNumberOfModes]bool // index: MComposed or MDecomposed
|
||||
|
||||
combinesForward bool // May combine with rune on the right
|
||||
combinesBackward bool // May combine with rune on the left
|
||||
isOneWay bool // Never appears in result
|
||||
inDecomp bool // Some decompositions result in this char.
|
||||
decomp Decomposition
|
||||
expandedDecomp Decomposition
|
||||
}
|
||||
|
||||
func (f FormInfo) String() string {
|
||||
buf := bytes.NewBuffer(make([]byte, 0))
|
||||
|
||||
fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed])
|
||||
fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
|
||||
fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward)
|
||||
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
|
||||
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
|
||||
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
|
||||
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
|
||||
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
type Decomposition []rune
|
||||
|
||||
func parseDecomposition(s string, skipfirst bool) (a []rune, err error) {
|
||||
decomp := strings.Split(s, " ")
|
||||
if len(decomp) > 0 && skipfirst {
|
||||
decomp = decomp[1:]
|
||||
}
|
||||
for _, d := range decomp {
|
||||
point, err := strconv.ParseUint(d, 16, 64)
|
||||
if err != nil {
|
||||
return a, err
|
||||
}
|
||||
a = append(a, rune(point))
|
||||
}
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func loadUnicodeData() {
|
||||
f := gen.OpenUCDFile("UnicodeData.txt")
|
||||
defer f.Close()
|
||||
p := ucd.New(f)
|
||||
for p.Next() {
|
||||
r := p.Rune(ucd.CodePoint)
|
||||
char := &chars[r]
|
||||
|
||||
char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
|
||||
decmap := p.String(ucd.DecompMapping)
|
||||
|
||||
exp, err := parseDecomposition(decmap, false)
|
||||
isCompat := false
|
||||
if err != nil {
|
||||
if len(decmap) > 0 {
|
||||
exp, err = parseDecomposition(decmap, true)
|
||||
if err != nil {
|
||||
log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
|
||||
}
|
||||
isCompat = true
|
||||
}
|
||||
}
|
||||
|
||||
char.name = p.String(ucd.Name)
|
||||
char.codePoint = r
|
||||
char.forms[FCompatibility].decomp = exp
|
||||
if !isCompat {
|
||||
char.forms[FCanonical].decomp = exp
|
||||
} else {
|
||||
char.compatDecomp = true
|
||||
}
|
||||
if len(decmap) > 0 {
|
||||
char.forms[FCompatibility].decomp = exp
|
||||
}
|
||||
}
|
||||
if err := p.Err(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// compactCCC converts the sparse set of CCC values to a continguous one,
|
||||
// reducing the number of bits needed from 8 to 6.
|
||||
func compactCCC() {
|
||||
m := make(map[uint8]uint8)
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
m[c.ccc] = 0
|
||||
}
|
||||
cccs := []int{}
|
||||
for v, _ := range m {
|
||||
cccs = append(cccs, int(v))
|
||||
}
|
||||
sort.Ints(cccs)
|
||||
for i, c := range cccs {
|
||||
cccMap[uint8(i)] = uint8(c)
|
||||
m[uint8(c)] = uint8(i)
|
||||
}
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
c.origCCC = c.ccc
|
||||
c.ccc = m[c.ccc]
|
||||
}
|
||||
if len(m) >= 1<<6 {
|
||||
log.Fatalf("too many difference CCC values: %d >= 64", len(m))
|
||||
}
|
||||
}
|
||||
|
||||
// CompositionExclusions.txt has form:
|
||||
// 0958 # ...
|
||||
// See http://unicode.org/reports/tr44/ for full explanation
|
||||
func loadCompositionExclusions() {
|
||||
f := gen.OpenUCDFile("CompositionExclusions.txt")
|
||||
defer f.Close()
|
||||
p := ucd.New(f)
|
||||
for p.Next() {
|
||||
c := &chars[p.Rune(0)]
|
||||
if c.excludeInComp {
|
||||
log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
|
||||
}
|
||||
c.excludeInComp = true
|
||||
}
|
||||
if e := p.Err(); e != nil {
|
||||
log.Fatal(e)
|
||||
}
|
||||
}
|
||||
|
||||
// hasCompatDecomp returns true if any of the recursive
|
||||
// decompositions contains a compatibility expansion.
|
||||
// In this case, the character may not occur in NFK*.
|
||||
func hasCompatDecomp(r rune) bool {
|
||||
c := &chars[r]
|
||||
if c.compatDecomp {
|
||||
return true
|
||||
}
|
||||
for _, d := range c.forms[FCompatibility].decomp {
|
||||
if hasCompatDecomp(d) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Hangul related constants.
|
||||
const (
|
||||
HangulBase = 0xAC00
|
||||
HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
|
||||
|
||||
JamoLBase = 0x1100
|
||||
JamoLEnd = 0x1113
|
||||
JamoVBase = 0x1161
|
||||
JamoVEnd = 0x1176
|
||||
JamoTBase = 0x11A8
|
||||
JamoTEnd = 0x11C3
|
||||
|
||||
JamoLVTCount = 19 * 21 * 28
|
||||
JamoTCount = 28
|
||||
)
|
||||
|
||||
func isHangul(r rune) bool {
|
||||
return HangulBase <= r && r < HangulEnd
|
||||
}
|
||||
|
||||
func isHangulWithoutJamoT(r rune) bool {
|
||||
if !isHangul(r) {
|
||||
return false
|
||||
}
|
||||
r -= HangulBase
|
||||
return r < JamoLVTCount && r%JamoTCount == 0
|
||||
}
|
||||
|
||||
func ccc(r rune) uint8 {
|
||||
return chars[r].ccc
|
||||
}
|
||||
|
||||
// Insert a rune in a buffer, ordered by Canonical Combining Class.
|
||||
func insertOrdered(b Decomposition, r rune) Decomposition {
|
||||
n := len(b)
|
||||
b = append(b, 0)
|
||||
cc := ccc(r)
|
||||
if cc > 0 {
|
||||
// Use bubble sort.
|
||||
for ; n > 0; n-- {
|
||||
if ccc(b[n-1]) <= cc {
|
||||
break
|
||||
}
|
||||
b[n] = b[n-1]
|
||||
}
|
||||
}
|
||||
b[n] = r
|
||||
return b
|
||||
}
|
||||
|
||||
// Recursively decompose.
|
||||
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
|
||||
dcomp := chars[r].forms[form].decomp
|
||||
if len(dcomp) == 0 {
|
||||
return insertOrdered(d, r)
|
||||
}
|
||||
for _, c := range dcomp {
|
||||
d = decomposeRecursive(form, c, d)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func completeCharFields(form int) {
|
||||
// Phase 0: pre-expand decomposition.
|
||||
for i := range chars {
|
||||
f := &chars[i].forms[form]
|
||||
if len(f.decomp) == 0 {
|
||||
continue
|
||||
}
|
||||
exp := make(Decomposition, 0)
|
||||
for _, c := range f.decomp {
|
||||
exp = decomposeRecursive(form, c, exp)
|
||||
}
|
||||
f.expandedDecomp = exp
|
||||
}
|
||||
|
||||
// Phase 1: composition exclusion, mark decomposition.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
// Marks script-specific exclusions and version restricted.
|
||||
f.isOneWay = c.excludeInComp
|
||||
|
||||
// Singletons
|
||||
f.isOneWay = f.isOneWay || len(f.decomp) == 1
|
||||
|
||||
// Non-starter decompositions
|
||||
if len(f.decomp) > 1 {
|
||||
chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
|
||||
f.isOneWay = f.isOneWay || chk
|
||||
}
|
||||
|
||||
// Runes that decompose into more than two runes.
|
||||
f.isOneWay = f.isOneWay || len(f.decomp) > 2
|
||||
|
||||
if form == FCompatibility {
|
||||
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
|
||||
}
|
||||
|
||||
for _, r := range f.decomp {
|
||||
chars[r].forms[form].inDecomp = true
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: forward and backward combining.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
if !f.isOneWay && len(f.decomp) == 2 {
|
||||
f0 := &chars[f.decomp[0]].forms[form]
|
||||
f1 := &chars[f.decomp[1]].forms[form]
|
||||
if !f0.isOneWay {
|
||||
f0.combinesForward = true
|
||||
}
|
||||
if !f1.isOneWay {
|
||||
f1.combinesBackward = true
|
||||
}
|
||||
}
|
||||
if isHangulWithoutJamoT(rune(i)) {
|
||||
f.combinesForward = true
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: quick check values.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
switch {
|
||||
case len(f.decomp) > 0:
|
||||
f.quickCheck[MDecomposed] = QCNo
|
||||
case isHangul(rune(i)):
|
||||
f.quickCheck[MDecomposed] = QCNo
|
||||
default:
|
||||
f.quickCheck[MDecomposed] = QCYes
|
||||
}
|
||||
switch {
|
||||
case f.isOneWay:
|
||||
f.quickCheck[MComposed] = QCNo
|
||||
case (i & 0xffff00) == JamoLBase:
|
||||
f.quickCheck[MComposed] = QCYes
|
||||
if JamoLBase <= i && i < JamoLEnd {
|
||||
f.combinesForward = true
|
||||
}
|
||||
if JamoVBase <= i && i < JamoVEnd {
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
f.combinesBackward = true
|
||||
f.combinesForward = true
|
||||
}
|
||||
if JamoTBase <= i && i < JamoTEnd {
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
f.combinesBackward = true
|
||||
}
|
||||
case !f.combinesBackward:
|
||||
f.quickCheck[MComposed] = QCYes
|
||||
default:
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func computeNonStarterCounts() {
|
||||
// Phase 4: leading and trailing non-starter count
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
|
||||
runes := []rune{rune(i)}
|
||||
// We always use FCompatibility so that the CGJ insertion points do not
|
||||
// change for repeated normalizations with different forms.
|
||||
if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 {
|
||||
runes = exp
|
||||
}
|
||||
// We consider runes that combine backwards to be non-starters for the
|
||||
// purpose of Stream-Safe Text Processing.
|
||||
for _, r := range runes {
|
||||
if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
|
||||
break
|
||||
}
|
||||
c.nLeadingNonStarters++
|
||||
}
|
||||
for i := len(runes) - 1; i >= 0; i-- {
|
||||
if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
|
||||
break
|
||||
}
|
||||
c.nTrailingNonStarters++
|
||||
}
|
||||
if c.nTrailingNonStarters > 3 {
|
||||
log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes)
|
||||
}
|
||||
|
||||
if isHangul(rune(i)) {
|
||||
c.nTrailingNonStarters = 2
|
||||
if isHangulWithoutJamoT(rune(i)) {
|
||||
c.nTrailingNonStarters = 1
|
||||
}
|
||||
}
|
||||
|
||||
if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t {
|
||||
log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t)
|
||||
}
|
||||
if t := c.nTrailingNonStarters; t > 3 {
|
||||
log.Fatalf("%U: number of trailing non-starters is %d > 3", t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func printBytes(w io.Writer, b []byte, name string) {
|
||||
fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b))
|
||||
fmt.Fprintf(w, "var %s = [...]byte {", name)
|
||||
for i, c := range b {
|
||||
switch {
|
||||
case i%64 == 0:
|
||||
fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63)
|
||||
case i%8 == 0:
|
||||
fmt.Fprintf(w, "\n")
|
||||
}
|
||||
fmt.Fprintf(w, "0x%.2X, ", c)
|
||||
}
|
||||
fmt.Fprint(w, "\n}\n\n")
|
||||
}
|
||||
|
||||
// See forminfo.go for format.
|
||||
func makeEntry(f *FormInfo, c *Char) uint16 {
|
||||
e := uint16(0)
|
||||
if r := c.codePoint; HangulBase <= r && r < HangulEnd {
|
||||
e |= 0x40
|
||||
}
|
||||
if f.combinesForward {
|
||||
e |= 0x20
|
||||
}
|
||||
if f.quickCheck[MDecomposed] == QCNo {
|
||||
e |= 0x4
|
||||
}
|
||||
switch f.quickCheck[MComposed] {
|
||||
case QCYes:
|
||||
case QCNo:
|
||||
e |= 0x10
|
||||
case QCMaybe:
|
||||
e |= 0x18
|
||||
default:
|
||||
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
|
||||
}
|
||||
e |= uint16(c.nTrailingNonStarters)
|
||||
return e
|
||||
}
|
||||
|
||||
// decompSet keeps track of unique decompositions, grouped by whether
|
||||
// the decomposition is followed by a trailing and/or leading CCC.
|
||||
type decompSet [7]map[string]bool
|
||||
|
||||
const (
|
||||
normalDecomp = iota
|
||||
firstMulti
|
||||
firstCCC
|
||||
endMulti
|
||||
firstLeadingCCC
|
||||
firstCCCZeroExcept
|
||||
firstStarterWithNLead
|
||||
lastDecomp
|
||||
)
|
||||
|
||||
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"}
|
||||
|
||||
func makeDecompSet() decompSet {
|
||||
m := decompSet{}
|
||||
for i := range m {
|
||||
m[i] = make(map[string]bool)
|
||||
}
|
||||
return m
|
||||
}
|
||||
func (m *decompSet) insert(key int, s string) {
|
||||
m[key][s] = true
|
||||
}
|
||||
|
||||
func printCharInfoTables(w io.Writer) int {
|
||||
mkstr := func(r rune, f *FormInfo) (int, string) {
|
||||
d := f.expandedDecomp
|
||||
s := string([]rune(d))
|
||||
if max := 1 << 6; len(s) >= max {
|
||||
const msg = "%U: too many bytes in decomposition: %d >= %d"
|
||||
log.Fatalf(msg, r, len(s), max)
|
||||
}
|
||||
head := uint8(len(s))
|
||||
if f.quickCheck[MComposed] != QCYes {
|
||||
head |= 0x40
|
||||
}
|
||||
if f.combinesForward {
|
||||
head |= 0x80
|
||||
}
|
||||
s = string([]byte{head}) + s
|
||||
|
||||
lccc := ccc(d[0])
|
||||
tccc := ccc(d[len(d)-1])
|
||||
cc := ccc(r)
|
||||
if cc != 0 && lccc == 0 && tccc == 0 {
|
||||
log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
|
||||
}
|
||||
if tccc < lccc && lccc != 0 {
|
||||
const msg = "%U: lccc (%d) must be <= tcc (%d)"
|
||||
log.Fatalf(msg, r, lccc, tccc)
|
||||
}
|
||||
index := normalDecomp
|
||||
nTrail := chars[r].nTrailingNonStarters
|
||||
nLead := chars[r].nLeadingNonStarters
|
||||
if tccc > 0 || lccc > 0 || nTrail > 0 {
|
||||
tccc <<= 2
|
||||
tccc |= nTrail
|
||||
s += string([]byte{tccc})
|
||||
index = endMulti
|
||||
for _, r := range d[1:] {
|
||||
if ccc(r) == 0 {
|
||||
index = firstCCC
|
||||
}
|
||||
}
|
||||
if lccc > 0 || nLead > 0 {
|
||||
s += string([]byte{lccc})
|
||||
if index == firstCCC {
|
||||
log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
|
||||
}
|
||||
index = firstLeadingCCC
|
||||
}
|
||||
if cc != lccc {
|
||||
if cc != 0 {
|
||||
log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
|
||||
}
|
||||
index = firstCCCZeroExcept
|
||||
}
|
||||
} else if len(d) > 1 {
|
||||
index = firstMulti
|
||||
}
|
||||
return index, s
|
||||
}
|
||||
|
||||
decompSet := makeDecompSet()
|
||||
const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
|
||||
decompSet.insert(firstStarterWithNLead, nLeadStr)
|
||||
|
||||
// Store the uniqued decompositions in a byte buffer,
|
||||
// preceded by their byte length.
|
||||
for _, c := range chars {
|
||||
for _, f := range c.forms {
|
||||
if len(f.expandedDecomp) == 0 {
|
||||
continue
|
||||
}
|
||||
if f.combinesBackward {
|
||||
log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
|
||||
}
|
||||
index, s := mkstr(c.codePoint, &f)
|
||||
decompSet.insert(index, s)
|
||||
}
|
||||
}
|
||||
|
||||
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
|
||||
size := 0
|
||||
positionMap := make(map[string]uint16)
|
||||
decompositions.WriteString("\000")
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, m := range decompSet {
|
||||
sa := []string{}
|
||||
for s := range m {
|
||||
sa = append(sa, s)
|
||||
}
|
||||
sort.Strings(sa)
|
||||
for _, s := range sa {
|
||||
p := decompositions.Len()
|
||||
decompositions.WriteString(s)
|
||||
positionMap[s] = uint16(p)
|
||||
}
|
||||
if cname[i] != "" {
|
||||
fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "maxDecomp = 0x8000")
|
||||
fmt.Fprintln(w, ")")
|
||||
b := decompositions.Bytes()
|
||||
printBytes(w, b, "decomps")
|
||||
size += len(b)
|
||||
|
||||
varnames := []string{"nfc", "nfkc"}
|
||||
for i := 0; i < FNumberOfFormTypes; i++ {
|
||||
trie := triegen.NewTrie(varnames[i])
|
||||
|
||||
for r, c := range chars {
|
||||
f := c.forms[i]
|
||||
d := f.expandedDecomp
|
||||
if len(d) != 0 {
|
||||
_, key := mkstr(c.codePoint, &f)
|
||||
trie.Insert(rune(r), uint64(positionMap[key]))
|
||||
if c.ccc != ccc(d[0]) {
|
||||
// We assume the lead ccc of a decomposition !=0 in this case.
|
||||
if ccc(d[0]) == 0 {
|
||||
log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
|
||||
}
|
||||
}
|
||||
} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
|
||||
// Handle cases where it can't be detected that the nLead should be equal
|
||||
// to nTrail.
|
||||
trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
|
||||
} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
|
||||
trie.Insert(c.codePoint, uint64(0x8000|v))
|
||||
}
|
||||
}
|
||||
sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
size += sz
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
func contains(sa []string, s string) bool {
|
||||
for _, a := range sa {
|
||||
if a == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func makeTables() {
|
||||
w := &bytes.Buffer{}
|
||||
|
||||
size := 0
|
||||
if *tablelist == "" {
|
||||
return
|
||||
}
|
||||
list := strings.Split(*tablelist, ",")
|
||||
if *tablelist == "all" {
|
||||
list = []string{"recomp", "info"}
|
||||
}
|
||||
|
||||
// Compute maximum decomposition size.
|
||||
max := 0
|
||||
for _, c := range chars {
|
||||
if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
|
||||
max = n
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintln(w, "const (")
|
||||
fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
|
||||
fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
|
||||
fmt.Fprintln(w)
|
||||
fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
|
||||
fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
|
||||
fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
|
||||
fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
|
||||
fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
|
||||
fmt.Fprintln(w, ")\n")
|
||||
|
||||
// Print the CCC remap table.
|
||||
size += len(cccMap)
|
||||
fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
|
||||
for i := 0; i < len(cccMap); i++ {
|
||||
if i%8 == 0 {
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
|
||||
}
|
||||
fmt.Fprintln(w, "\n}\n")
|
||||
|
||||
if contains(list, "info") {
|
||||
size += printCharInfoTables(w)
|
||||
}
|
||||
|
||||
if contains(list, "recomp") {
|
||||
// Note that we use 32 bit keys, instead of 64 bit.
|
||||
// This clips the bits of three entries, but we know
|
||||
// this won't cause a collision. The compiler will catch
|
||||
// any changes made to UnicodeData.txt that introduces
|
||||
// a collision.
|
||||
// Note that the recomposition map for NFC and NFKC
|
||||
// are identical.
|
||||
|
||||
// Recomposition map
|
||||
nrentries := 0
|
||||
for _, c := range chars {
|
||||
f := c.forms[FCanonical]
|
||||
if !f.isOneWay && len(f.decomp) > 0 {
|
||||
nrentries++
|
||||
}
|
||||
}
|
||||
sz := nrentries * 8
|
||||
size += sz
|
||||
fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
|
||||
fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
|
||||
for i, c := range chars {
|
||||
f := c.forms[FCanonical]
|
||||
d := f.decomp
|
||||
if !f.isOneWay && len(d) > 0 {
|
||||
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
|
||||
fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, "}\n\n")
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
|
||||
gen.WriteGoFile("tables.go", "norm", w.Bytes())
|
||||
}
|
||||
|
||||
func printChars() {
|
||||
if *verbose {
|
||||
for _, c := range chars {
|
||||
if !c.isValid() || c.state == SMissing {
|
||||
continue
|
||||
}
|
||||
fmt.Println(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// verifyComputed does various consistency tests.
|
||||
func verifyComputed() {
|
||||
for i, c := range chars {
|
||||
for _, f := range c.forms {
|
||||
isNo := (f.quickCheck[MDecomposed] == QCNo)
|
||||
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
|
||||
log.Fatalf("%U: NF*D QC must be No if rune decomposes", i)
|
||||
}
|
||||
|
||||
isMaybe := f.quickCheck[MComposed] == QCMaybe
|
||||
if f.combinesBackward != isMaybe {
|
||||
log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i)
|
||||
}
|
||||
if len(f.decomp) > 0 && f.combinesForward && isMaybe {
|
||||
log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i)
|
||||
}
|
||||
|
||||
if len(f.expandedDecomp) != 0 {
|
||||
continue
|
||||
}
|
||||
if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b {
|
||||
// We accept these runes to be treated differently (it only affects
|
||||
// segment breaking in iteration, most likely on improper use), but
|
||||
// reconsider if more characters are added.
|
||||
// U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;;
|
||||
// U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;;
|
||||
// U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
|
||||
// U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
|
||||
// U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;;
|
||||
// U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;;
|
||||
if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) {
|
||||
log.Fatalf("%U: nLead was %v; want %v", i, a, b)
|
||||
}
|
||||
}
|
||||
}
|
||||
nfc := c.forms[FCanonical]
|
||||
nfkc := c.forms[FCompatibility]
|
||||
if nfc.combinesBackward != nfkc.combinesBackward {
|
||||
log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use values in DerivedNormalizationProps.txt to compare against the
|
||||
// values we computed.
|
||||
// DerivedNormalizationProps.txt has form:
|
||||
// 00C0..00C5 ; NFD_QC; N # ...
|
||||
// 0374 ; NFD_QC; N # ...
|
||||
// See http://unicode.org/reports/tr44/ for full explanation
|
||||
func testDerived() {
|
||||
f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
|
||||
defer f.Close()
|
||||
p := ucd.New(f)
|
||||
for p.Next() {
|
||||
r := p.Rune(0)
|
||||
c := &chars[r]
|
||||
|
||||
var ftype, mode int
|
||||
qt := p.String(1)
|
||||
switch qt {
|
||||
case "NFC_QC":
|
||||
ftype, mode = FCanonical, MComposed
|
||||
case "NFD_QC":
|
||||
ftype, mode = FCanonical, MDecomposed
|
||||
case "NFKC_QC":
|
||||
ftype, mode = FCompatibility, MComposed
|
||||
case "NFKD_QC":
|
||||
ftype, mode = FCompatibility, MDecomposed
|
||||
default:
|
||||
continue
|
||||
}
|
||||
var qr QCResult
|
||||
switch p.String(2) {
|
||||
case "Y":
|
||||
qr = QCYes
|
||||
case "N":
|
||||
qr = QCNo
|
||||
case "M":
|
||||
qr = QCMaybe
|
||||
default:
|
||||
log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
|
||||
}
|
||||
if got := c.forms[ftype].quickCheck[mode]; got != qr {
|
||||
log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
|
||||
}
|
||||
c.forms[ftype].verified[mode] = true
|
||||
}
|
||||
if err := p.Err(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
// Any unspecified value must be QCYes. Verify this.
|
||||
for i, c := range chars {
|
||||
for j, fd := range c.forms {
|
||||
for k, qr := range fd.quickCheck {
|
||||
if !fd.verified[k] && qr != QCYes {
|
||||
m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
|
||||
log.Printf(m, i, j, k, qr, c.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var testHeader = `const (
|
||||
Yes = iota
|
||||
No
|
||||
Maybe
|
||||
)
|
||||
|
||||
type formData struct {
|
||||
qc uint8
|
||||
combinesForward bool
|
||||
decomposition string
|
||||
}
|
||||
|
||||
type runeData struct {
|
||||
r rune
|
||||
ccc uint8
|
||||
nLead uint8
|
||||
nTrail uint8
|
||||
f [2]formData // 0: canonical; 1: compatibility
|
||||
}
|
||||
|
||||
func f(qc uint8, cf bool, dec string) [2]formData {
|
||||
return [2]formData{{qc, cf, dec}, {qc, cf, dec}}
|
||||
}
|
||||
|
||||
func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData {
|
||||
return [2]formData{{qc, cf, d}, {qck, cfk, dk}}
|
||||
}
|
||||
|
||||
var testData = []runeData{
|
||||
`
|
||||
|
||||
func printTestdata() {
|
||||
type lastInfo struct {
|
||||
ccc uint8
|
||||
nLead uint8
|
||||
nTrail uint8
|
||||
f string
|
||||
}
|
||||
|
||||
last := lastInfo{}
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintf(w, testHeader)
|
||||
for r, c := range chars {
|
||||
f := c.forms[FCanonical]
|
||||
qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
|
||||
f = c.forms[FCompatibility]
|
||||
qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
|
||||
s := ""
|
||||
if d == dk && qc == qck && cf == cfk {
|
||||
s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
|
||||
} else {
|
||||
s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
|
||||
}
|
||||
current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
|
||||
if last != current {
|
||||
fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
|
||||
last = current
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
gen.WriteGoFile("data_test.go", "norm", w.Bytes())
|
||||
}
|
|
@ -0,0 +1,609 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Note: the file data_test.go that is generated should not be checked in.
|
||||
//go:generate go run maketables.go triegen.go
|
||||
//go:generate go test -tags test
|
||||
|
||||
// Package norm contains types and functions for normalizing Unicode strings.
|
||||
package norm // import "golang.org/x/text/unicode/norm"
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// A Form denotes a canonical representation of Unicode code points.
|
||||
// The Unicode-defined normalization and equivalence forms are:
|
||||
//
|
||||
// NFC Unicode Normalization Form C
|
||||
// NFD Unicode Normalization Form D
|
||||
// NFKC Unicode Normalization Form KC
|
||||
// NFKD Unicode Normalization Form KD
|
||||
//
|
||||
// For a Form f, this documentation uses the notation f(x) to mean
|
||||
// the bytes or string x converted to the given form.
|
||||
// A position n in x is called a boundary if conversion to the form can
|
||||
// proceed independently on both sides:
|
||||
// f(x) == append(f(x[0:n]), f(x[n:])...)
|
||||
//
|
||||
// References: http://unicode.org/reports/tr15/ and
|
||||
// http://unicode.org/notes/tn5/.
|
||||
type Form int
|
||||
|
||||
const (
|
||||
NFC Form = iota
|
||||
NFD
|
||||
NFKC
|
||||
NFKD
|
||||
)
|
||||
|
||||
// Bytes returns f(b). May return b if f(b) = b.
|
||||
func (f Form) Bytes(b []byte) []byte {
|
||||
src := inputBytes(b)
|
||||
ft := formTable[f]
|
||||
n, ok := ft.quickSpan(src, 0, len(b), true)
|
||||
if ok {
|
||||
return b
|
||||
}
|
||||
out := make([]byte, n, len(b))
|
||||
copy(out, b[0:n])
|
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
|
||||
return doAppendInner(&rb, n)
|
||||
}
|
||||
|
||||
// String returns f(s).
|
||||
func (f Form) String(s string) string {
|
||||
src := inputString(s)
|
||||
ft := formTable[f]
|
||||
n, ok := ft.quickSpan(src, 0, len(s), true)
|
||||
if ok {
|
||||
return s
|
||||
}
|
||||
out := make([]byte, n, len(s))
|
||||
copy(out, s[0:n])
|
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
|
||||
return string(doAppendInner(&rb, n))
|
||||
}
|
||||
|
||||
// IsNormal returns true if b == f(b).
|
||||
func (f Form) IsNormal(b []byte) bool {
|
||||
src := inputBytes(b)
|
||||
ft := formTable[f]
|
||||
bp, ok := ft.quickSpan(src, 0, len(b), true)
|
||||
if ok {
|
||||
return true
|
||||
}
|
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
|
||||
rb.setFlusher(nil, cmpNormalBytes)
|
||||
for bp < len(b) {
|
||||
rb.out = b[bp:]
|
||||
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
|
||||
return false
|
||||
}
|
||||
bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func cmpNormalBytes(rb *reorderBuffer) bool {
|
||||
b := rb.out
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
info := rb.rune[i]
|
||||
if int(info.size) > len(b) {
|
||||
return false
|
||||
}
|
||||
p := info.pos
|
||||
pe := p + info.size
|
||||
for ; p < pe; p++ {
|
||||
if b[0] != rb.byte[p] {
|
||||
return false
|
||||
}
|
||||
b = b[1:]
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// IsNormalString returns true if s == f(s).
|
||||
func (f Form) IsNormalString(s string) bool {
|
||||
src := inputString(s)
|
||||
ft := formTable[f]
|
||||
bp, ok := ft.quickSpan(src, 0, len(s), true)
|
||||
if ok {
|
||||
return true
|
||||
}
|
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
|
||||
rb.setFlusher(nil, func(rb *reorderBuffer) bool {
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
info := rb.rune[i]
|
||||
if bp+int(info.size) > len(s) {
|
||||
return false
|
||||
}
|
||||
p := info.pos
|
||||
pe := p + info.size
|
||||
for ; p < pe; p++ {
|
||||
if s[bp] != rb.byte[p] {
|
||||
return false
|
||||
}
|
||||
bp++
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
for bp < len(s) {
|
||||
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
|
||||
return false
|
||||
}
|
||||
bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// patchTail fixes a case where a rune may be incorrectly normalized
|
||||
// if it is followed by illegal continuation bytes. It returns the
|
||||
// patched buffer and whether the decomposition is still in progress.
|
||||
func patchTail(rb *reorderBuffer) bool {
|
||||
info, p := lastRuneStart(&rb.f, rb.out)
|
||||
if p == -1 || info.size == 0 {
|
||||
return true
|
||||
}
|
||||
end := p + int(info.size)
|
||||
extra := len(rb.out) - end
|
||||
if extra > 0 {
|
||||
// Potentially allocating memory. However, this only
|
||||
// happens with ill-formed UTF-8.
|
||||
x := make([]byte, 0)
|
||||
x = append(x, rb.out[len(rb.out)-extra:]...)
|
||||
rb.out = rb.out[:end]
|
||||
decomposeToLastBoundary(rb)
|
||||
rb.doFlush()
|
||||
rb.out = append(rb.out, x...)
|
||||
return false
|
||||
}
|
||||
buf := rb.out[p:]
|
||||
rb.out = rb.out[:p]
|
||||
decomposeToLastBoundary(rb)
|
||||
if s := rb.ss.next(info); s == ssStarter {
|
||||
rb.doFlush()
|
||||
rb.ss.first(info)
|
||||
} else if s == ssOverflow {
|
||||
rb.doFlush()
|
||||
rb.insertCGJ()
|
||||
rb.ss = 0
|
||||
}
|
||||
rb.insertUnsafe(inputBytes(buf), 0, info)
|
||||
return true
|
||||
}
|
||||
|
||||
func appendQuick(rb *reorderBuffer, i int) int {
|
||||
if rb.nsrc == i {
|
||||
return i
|
||||
}
|
||||
end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
|
||||
rb.out = rb.src.appendSlice(rb.out, i, end)
|
||||
return end
|
||||
}
|
||||
|
||||
// Append returns f(append(out, b...)).
|
||||
// The buffer out must be nil, empty, or equal to f(out).
|
||||
func (f Form) Append(out []byte, src ...byte) []byte {
|
||||
return f.doAppend(out, inputBytes(src), len(src))
|
||||
}
|
||||
|
||||
func (f Form) doAppend(out []byte, src input, n int) []byte {
|
||||
if n == 0 {
|
||||
return out
|
||||
}
|
||||
ft := formTable[f]
|
||||
// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
|
||||
if len(out) == 0 {
|
||||
p, _ := ft.quickSpan(src, 0, n, true)
|
||||
out = src.appendSlice(out, 0, p)
|
||||
if p == n {
|
||||
return out
|
||||
}
|
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
|
||||
return doAppendInner(&rb, p)
|
||||
}
|
||||
rb := reorderBuffer{f: *ft, src: src, nsrc: n}
|
||||
return doAppend(&rb, out, 0)
|
||||
}
|
||||
|
||||
func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
|
||||
rb.setFlusher(out, appendFlush)
|
||||
src, n := rb.src, rb.nsrc
|
||||
doMerge := len(out) > 0
|
||||
if q := src.skipContinuationBytes(p); q > p {
|
||||
// Move leading non-starters to destination.
|
||||
rb.out = src.appendSlice(rb.out, p, q)
|
||||
p = q
|
||||
doMerge = patchTail(rb)
|
||||
}
|
||||
fd := &rb.f
|
||||
if doMerge {
|
||||
var info Properties
|
||||
if p < n {
|
||||
info = fd.info(src, p)
|
||||
if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
|
||||
if p == 0 {
|
||||
decomposeToLastBoundary(rb)
|
||||
}
|
||||
p = decomposeSegment(rb, p, true)
|
||||
}
|
||||
}
|
||||
if info.size == 0 {
|
||||
rb.doFlush()
|
||||
// Append incomplete UTF-8 encoding.
|
||||
return src.appendSlice(rb.out, p, n)
|
||||
}
|
||||
if rb.nrune > 0 {
|
||||
return doAppendInner(rb, p)
|
||||
}
|
||||
}
|
||||
p = appendQuick(rb, p)
|
||||
return doAppendInner(rb, p)
|
||||
}
|
||||
|
||||
func doAppendInner(rb *reorderBuffer, p int) []byte {
|
||||
for n := rb.nsrc; p < n; {
|
||||
p = decomposeSegment(rb, p, true)
|
||||
p = appendQuick(rb, p)
|
||||
}
|
||||
return rb.out
|
||||
}
|
||||
|
||||
// AppendString returns f(append(out, []byte(s))).
|
||||
// The buffer out must be nil, empty, or equal to f(out).
|
||||
func (f Form) AppendString(out []byte, src string) []byte {
|
||||
return f.doAppend(out, inputString(src), len(src))
|
||||
}
|
||||
|
||||
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
|
||||
// It is not guaranteed to return the largest such n.
|
||||
func (f Form) QuickSpan(b []byte) int {
|
||||
n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
|
||||
return n
|
||||
}
|
||||
|
||||
// Span implements transform.SpanningTransformer. It returns a boundary n such
|
||||
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
|
||||
func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
|
||||
n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
|
||||
if n < len(b) {
|
||||
if !ok {
|
||||
err = transform.ErrEndOfSpan
|
||||
} else {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
|
||||
// It is not guaranteed to return the largest such n.
|
||||
func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
|
||||
n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
|
||||
if n < len(s) {
|
||||
if !ok {
|
||||
err = transform.ErrEndOfSpan
|
||||
} else {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
|
||||
// whether any non-normalized parts were found. If atEOF is false, n will
|
||||
// not point past the last segment if this segment might be become
|
||||
// non-normalized by appending other runes.
|
||||
func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
|
||||
var lastCC uint8
|
||||
ss := streamSafe(0)
|
||||
lastSegStart := i
|
||||
for n = end; i < n; {
|
||||
if j := src.skipASCII(i, n); i != j {
|
||||
i = j
|
||||
lastSegStart = i - 1
|
||||
lastCC = 0
|
||||
ss = 0
|
||||
continue
|
||||
}
|
||||
info := f.info(src, i)
|
||||
if info.size == 0 {
|
||||
if atEOF {
|
||||
// include incomplete runes
|
||||
return n, true
|
||||
}
|
||||
return lastSegStart, true
|
||||
}
|
||||
// This block needs to be before the next, because it is possible to
|
||||
// have an overflow for runes that are starters (e.g. with U+FF9E).
|
||||
switch ss.next(info) {
|
||||
case ssStarter:
|
||||
lastSegStart = i
|
||||
case ssOverflow:
|
||||
return lastSegStart, false
|
||||
case ssSuccess:
|
||||
if lastCC > info.ccc {
|
||||
return lastSegStart, false
|
||||
}
|
||||
}
|
||||
if f.composing {
|
||||
if !info.isYesC() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if !info.isYesD() {
|
||||
break
|
||||
}
|
||||
}
|
||||
lastCC = info.ccc
|
||||
i += int(info.size)
|
||||
}
|
||||
if i == n {
|
||||
if !atEOF {
|
||||
n = lastSegStart
|
||||
}
|
||||
return n, true
|
||||
}
|
||||
return lastSegStart, false
|
||||
}
|
||||
|
||||
// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
|
||||
// It is not guaranteed to return the largest such n.
|
||||
func (f Form) QuickSpanString(s string) int {
|
||||
n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
|
||||
return n
|
||||
}
|
||||
|
||||
// FirstBoundary returns the position i of the first boundary in b
|
||||
// or -1 if b contains no boundary.
|
||||
func (f Form) FirstBoundary(b []byte) int {
|
||||
return f.firstBoundary(inputBytes(b), len(b))
|
||||
}
|
||||
|
||||
func (f Form) firstBoundary(src input, nsrc int) int {
|
||||
i := src.skipContinuationBytes(0)
|
||||
if i >= nsrc {
|
||||
return -1
|
||||
}
|
||||
fd := formTable[f]
|
||||
ss := streamSafe(0)
|
||||
// We should call ss.first here, but we can't as the first rune is
|
||||
// skipped already. This means FirstBoundary can't really determine
|
||||
// CGJ insertion points correctly. Luckily it doesn't have to.
|
||||
for {
|
||||
info := fd.info(src, i)
|
||||
if info.size == 0 {
|
||||
return -1
|
||||
}
|
||||
if s := ss.next(info); s != ssSuccess {
|
||||
return i
|
||||
}
|
||||
i += int(info.size)
|
||||
if i >= nsrc {
|
||||
if !info.BoundaryAfter() && !ss.isMax() {
|
||||
return -1
|
||||
}
|
||||
return nsrc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FirstBoundaryInString returns the position i of the first boundary in s
|
||||
// or -1 if s contains no boundary.
|
||||
func (f Form) FirstBoundaryInString(s string) int {
|
||||
return f.firstBoundary(inputString(s), len(s))
|
||||
}
|
||||
|
||||
// NextBoundary reports the index of the boundary between the first and next
|
||||
// segment in b or -1 if atEOF is false and there are not enough bytes to
|
||||
// determine this boundary.
|
||||
func (f Form) NextBoundary(b []byte, atEOF bool) int {
|
||||
return f.nextBoundary(inputBytes(b), len(b), atEOF)
|
||||
}
|
||||
|
||||
// NextBoundaryInString reports the index of the boundary between the first and
|
||||
// next segment in b or -1 if atEOF is false and there are not enough bytes to
|
||||
// determine this boundary.
|
||||
func (f Form) NextBoundaryInString(s string, atEOF bool) int {
|
||||
return f.nextBoundary(inputString(s), len(s), atEOF)
|
||||
}
|
||||
|
||||
func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
|
||||
if nsrc == 0 {
|
||||
if atEOF {
|
||||
return 0
|
||||
}
|
||||
return -1
|
||||
}
|
||||
fd := formTable[f]
|
||||
info := fd.info(src, 0)
|
||||
if info.size == 0 {
|
||||
if atEOF {
|
||||
return 1
|
||||
}
|
||||
return -1
|
||||
}
|
||||
ss := streamSafe(0)
|
||||
ss.first(info)
|
||||
|
||||
for i := int(info.size); i < nsrc; i += int(info.size) {
|
||||
info = fd.info(src, i)
|
||||
if info.size == 0 {
|
||||
if atEOF {
|
||||
return i
|
||||
}
|
||||
return -1
|
||||
}
|
||||
// TODO: Using streamSafe to determine the boundary isn't the same as
|
||||
// using BoundaryBefore. Determine which should be used.
|
||||
if s := ss.next(info); s != ssSuccess {
|
||||
return i
|
||||
}
|
||||
}
|
||||
if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
|
||||
return -1
|
||||
}
|
||||
return nsrc
|
||||
}
|
||||
|
||||
// LastBoundary returns the position i of the last boundary in b
|
||||
// or -1 if b contains no boundary.
|
||||
func (f Form) LastBoundary(b []byte) int {
|
||||
return lastBoundary(formTable[f], b)
|
||||
}
|
||||
|
||||
func lastBoundary(fd *formInfo, b []byte) int {
|
||||
i := len(b)
|
||||
info, p := lastRuneStart(fd, b)
|
||||
if p == -1 {
|
||||
return -1
|
||||
}
|
||||
if info.size == 0 { // ends with incomplete rune
|
||||
if p == 0 { // starts with incomplete rune
|
||||
return -1
|
||||
}
|
||||
i = p
|
||||
info, p = lastRuneStart(fd, b[:i])
|
||||
if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
|
||||
return i
|
||||
}
|
||||
}
|
||||
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
|
||||
return i
|
||||
}
|
||||
if info.BoundaryAfter() {
|
||||
return i
|
||||
}
|
||||
ss := streamSafe(0)
|
||||
v := ss.backwards(info)
|
||||
for i = p; i >= 0 && v != ssStarter; i = p {
|
||||
info, p = lastRuneStart(fd, b[:i])
|
||||
if v = ss.backwards(info); v == ssOverflow {
|
||||
break
|
||||
}
|
||||
if p+int(info.size) != i {
|
||||
if p == -1 { // no boundary found
|
||||
return -1
|
||||
}
|
||||
return i // boundary after an illegal UTF-8 encoding
|
||||
}
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
|
||||
// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
|
||||
// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
|
||||
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
|
||||
// Force one character to be consumed.
|
||||
info := rb.f.info(rb.src, sp)
|
||||
if info.size == 0 {
|
||||
return 0
|
||||
}
|
||||
if s := rb.ss.next(info); s == ssStarter {
|
||||
// TODO: this could be removed if we don't support merging.
|
||||
if rb.nrune > 0 {
|
||||
goto end
|
||||
}
|
||||
} else if s == ssOverflow {
|
||||
rb.insertCGJ()
|
||||
goto end
|
||||
}
|
||||
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
|
||||
return int(err)
|
||||
}
|
||||
for {
|
||||
sp += int(info.size)
|
||||
if sp >= rb.nsrc {
|
||||
if !atEOF && !info.BoundaryAfter() {
|
||||
return int(iShortSrc)
|
||||
}
|
||||
break
|
||||
}
|
||||
info = rb.f.info(rb.src, sp)
|
||||
if info.size == 0 {
|
||||
if !atEOF {
|
||||
return int(iShortSrc)
|
||||
}
|
||||
break
|
||||
}
|
||||
if s := rb.ss.next(info); s == ssStarter {
|
||||
break
|
||||
} else if s == ssOverflow {
|
||||
rb.insertCGJ()
|
||||
break
|
||||
}
|
||||
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
|
||||
return int(err)
|
||||
}
|
||||
}
|
||||
end:
|
||||
if !rb.doFlush() {
|
||||
return int(iShortDst)
|
||||
}
|
||||
return sp
|
||||
}
|
||||
|
||||
// lastRuneStart returns the runeInfo and position of the last
|
||||
// rune in buf or the zero runeInfo and -1 if no rune was found.
|
||||
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
|
||||
p := len(buf) - 1
|
||||
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
|
||||
}
|
||||
if p < 0 {
|
||||
return Properties{}, -1
|
||||
}
|
||||
return fd.info(inputBytes(buf), p), p
|
||||
}
|
||||
|
||||
// decomposeToLastBoundary finds an open segment at the end of the buffer
|
||||
// and scans it into rb. Returns the buffer minus the last segment.
|
||||
func decomposeToLastBoundary(rb *reorderBuffer) {
|
||||
fd := &rb.f
|
||||
info, i := lastRuneStart(fd, rb.out)
|
||||
if int(info.size) != len(rb.out)-i {
|
||||
// illegal trailing continuation bytes
|
||||
return
|
||||
}
|
||||
if info.BoundaryAfter() {
|
||||
return
|
||||
}
|
||||
var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
|
||||
padd := 0
|
||||
ss := streamSafe(0)
|
||||
p := len(rb.out)
|
||||
for {
|
||||
add[padd] = info
|
||||
v := ss.backwards(info)
|
||||
if v == ssOverflow {
|
||||
// Note that if we have an overflow, it the string we are appending to
|
||||
// is not correctly normalized. In this case the behavior is undefined.
|
||||
break
|
||||
}
|
||||
padd++
|
||||
p -= int(info.size)
|
||||
if v == ssStarter || p < 0 {
|
||||
break
|
||||
}
|
||||
info, i = lastRuneStart(fd, rb.out[:p])
|
||||
if int(info.size) != p-i {
|
||||
break
|
||||
}
|
||||
}
|
||||
rb.ss = ss
|
||||
// Copy bytes for insertion as we may need to overwrite rb.out.
|
||||
var buf [maxBufferSize * utf8.UTFMax]byte
|
||||
cp := buf[:copy(buf[:], rb.out[p:])]
|
||||
rb.out = rb.out[:p]
|
||||
for padd--; padd >= 0; padd-- {
|
||||
info = add[padd]
|
||||
rb.insertUnsafe(inputBytes(cp), 0, info)
|
||||
cp = cp[info.size:]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import "io"
|
||||
|
||||
type normWriter struct {
|
||||
rb reorderBuffer
|
||||
w io.Writer
|
||||
buf []byte
|
||||
}
|
||||
|
||||
// Write implements the standard write interface. If the last characters are
|
||||
// not at a normalization boundary, the bytes will be buffered for the next
|
||||
// write. The remaining bytes will be written on close.
|
||||
func (w *normWriter) Write(data []byte) (n int, err error) {
|
||||
// Process data in pieces to keep w.buf size bounded.
|
||||
const chunk = 4000
|
||||
|
||||
for len(data) > 0 {
|
||||
// Normalize into w.buf.
|
||||
m := len(data)
|
||||
if m > chunk {
|
||||
m = chunk
|
||||
}
|
||||
w.rb.src = inputBytes(data[:m])
|
||||
w.rb.nsrc = m
|
||||
w.buf = doAppend(&w.rb, w.buf, 0)
|
||||
data = data[m:]
|
||||
n += m
|
||||
|
||||
// Write out complete prefix, save remainder.
|
||||
// Note that lastBoundary looks back at most 31 runes.
|
||||
i := lastBoundary(&w.rb.f, w.buf)
|
||||
if i == -1 {
|
||||
i = 0
|
||||
}
|
||||
if i > 0 {
|
||||
if _, err = w.w.Write(w.buf[:i]); err != nil {
|
||||
break
|
||||
}
|
||||
bn := copy(w.buf, w.buf[i:])
|
||||
w.buf = w.buf[:bn]
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Close forces data that remains in the buffer to be written.
|
||||
func (w *normWriter) Close() error {
|
||||
if len(w.buf) > 0 {
|
||||
_, err := w.w.Write(w.buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Writer returns a new writer that implements Write(b)
|
||||
// by writing f(b) to w. The returned writer may use an
|
||||
// an internal buffer to maintain state across Write calls.
|
||||
// Calling its Close method writes any buffered data to w.
|
||||
func (f Form) Writer(w io.Writer) io.WriteCloser {
|
||||
wr := &normWriter{rb: reorderBuffer{}, w: w}
|
||||
wr.rb.init(f, nil)
|
||||
return wr
|
||||
}
|
||||
|
||||
type normReader struct {
|
||||
rb reorderBuffer
|
||||
r io.Reader
|
||||
inbuf []byte
|
||||
outbuf []byte
|
||||
bufStart int
|
||||
lastBoundary int
|
||||
err error
|
||||
}
|
||||
|
||||
// Read implements the standard read interface.
|
||||
func (r *normReader) Read(p []byte) (int, error) {
|
||||
for {
|
||||
if r.lastBoundary-r.bufStart > 0 {
|
||||
n := copy(p, r.outbuf[r.bufStart:r.lastBoundary])
|
||||
r.bufStart += n
|
||||
if r.lastBoundary-r.bufStart > 0 {
|
||||
return n, nil
|
||||
}
|
||||
return n, r.err
|
||||
}
|
||||
if r.err != nil {
|
||||
return 0, r.err
|
||||
}
|
||||
outn := copy(r.outbuf, r.outbuf[r.lastBoundary:])
|
||||
r.outbuf = r.outbuf[0:outn]
|
||||
r.bufStart = 0
|
||||
|
||||
n, err := r.r.Read(r.inbuf)
|
||||
r.rb.src = inputBytes(r.inbuf[0:n])
|
||||
r.rb.nsrc, r.err = n, err
|
||||
if n > 0 {
|
||||
r.outbuf = doAppend(&r.rb, r.outbuf, 0)
|
||||
}
|
||||
if err == io.EOF {
|
||||
r.lastBoundary = len(r.outbuf)
|
||||
} else {
|
||||
r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf)
|
||||
if r.lastBoundary == -1 {
|
||||
r.lastBoundary = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reader returns a new reader that implements Read
|
||||
// by reading data from r and returning f(data).
|
||||
func (f Form) Reader(r io.Reader) io.Reader {
|
||||
const chunk = 4000
|
||||
buf := make([]byte, chunk)
|
||||
rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf}
|
||||
rr.rb.init(f, buf)
|
||||
return rr
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,88 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Reset implements the Reset method of the transform.Transformer interface.
|
||||
func (Form) Reset() {}
|
||||
|
||||
// Transform implements the Transform method of the transform.Transformer
|
||||
// interface. It may need to write segments of up to MaxSegmentSize at once.
|
||||
// Users should either catch ErrShortDst and allow dst to grow or have dst be at
|
||||
// least of size MaxTransformChunkSize to be guaranteed of progress.
|
||||
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
n := 0
|
||||
// Cap the maximum number of src bytes to check.
|
||||
b := src
|
||||
eof := atEOF
|
||||
if ns := len(dst); ns < len(b) {
|
||||
err = transform.ErrShortDst
|
||||
eof = false
|
||||
b = b[:ns]
|
||||
}
|
||||
i, ok := formTable[f].quickSpan(inputBytes(b), n, len(b), eof)
|
||||
n += copy(dst[n:], b[n:i])
|
||||
if !ok {
|
||||
nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF)
|
||||
return nDst + n, nSrc + n, err
|
||||
}
|
||||
if n < len(src) && !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
return n, n, err
|
||||
}
|
||||
|
||||
func flushTransform(rb *reorderBuffer) bool {
|
||||
// Write out (must fully fit in dst, or else it is an ErrShortDst).
|
||||
if len(rb.out) < rb.nrune*utf8.UTFMax {
|
||||
return false
|
||||
}
|
||||
rb.out = rb.out[rb.flushCopy(rb.out):]
|
||||
return true
|
||||
}
|
||||
|
||||
var errs = []error{nil, transform.ErrShortDst, transform.ErrShortSrc}
|
||||
|
||||
// transform implements the transform.Transformer interface. It is only called
|
||||
// when quickSpan does not pass for a given string.
|
||||
func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
// TODO: get rid of reorderBuffer. See CL 23460044.
|
||||
rb := reorderBuffer{}
|
||||
rb.init(f, src)
|
||||
for {
|
||||
// Load segment into reorder buffer.
|
||||
rb.setFlusher(dst[nDst:], flushTransform)
|
||||
end := decomposeSegment(&rb, nSrc, atEOF)
|
||||
if end < 0 {
|
||||
return nDst, nSrc, errs[-end]
|
||||
}
|
||||
nDst = len(dst) - len(rb.out)
|
||||
nSrc = end
|
||||
|
||||
// Next quickSpan.
|
||||
end = rb.nsrc
|
||||
eof := atEOF
|
||||
if n := nSrc + len(dst) - nDst; n < end {
|
||||
err = transform.ErrShortDst
|
||||
end = n
|
||||
eof = false
|
||||
}
|
||||
end, ok := rb.f.quickSpan(rb.src, nSrc, end, eof)
|
||||
n := copy(dst[nDst:], rb.src.bytes[nSrc:end])
|
||||
nSrc += n
|
||||
nDst += n
|
||||
if ok {
|
||||
if n < rb.nsrc && !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
type valueRange struct {
|
||||
value uint16 // header: value:stride
|
||||
lo, hi byte // header: lo:n
|
||||
}
|
||||
|
||||
type sparseBlocks struct {
|
||||
values []valueRange
|
||||
offset []uint16
|
||||
}
|
||||
|
||||
var nfcSparse = sparseBlocks{
|
||||
values: nfcSparseValues[:],
|
||||
offset: nfcSparseOffset[:],
|
||||
}
|
||||
|
||||
var nfkcSparse = sparseBlocks{
|
||||
values: nfkcSparseValues[:],
|
||||
offset: nfkcSparseOffset[:],
|
||||
}
|
||||
|
||||
var (
|
||||
nfcData = newNfcTrie(0)
|
||||
nfkcData = newNfkcTrie(0)
|
||||
)
|
||||
|
||||
// lookupValue determines the type of block n and looks up the value for b.
|
||||
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
|
||||
// is a list of ranges with an accompanying value. Given a matching range r,
|
||||
// the value for b is by r.value + (b - r.lo) * stride.
|
||||
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||
offset := t.offset[n]
|
||||
header := t.values[offset]
|
||||
lo := offset + 1
|
||||
hi := lo + uint16(header.lo)
|
||||
for lo < hi {
|
||||
m := lo + (hi-lo)/2
|
||||
r := t.values[m]
|
||||
if r.lo <= b && b <= r.hi {
|
||||
return r.value + uint16(b-r.lo)*header.value
|
||||
}
|
||||
if b < r.lo {
|
||||
hi = m
|
||||
} else {
|
||||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Trie table generator.
|
||||
// Used by make*tables tools to generate a go file with trie data structures
|
||||
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
|
||||
// sequence are used to lookup offsets in the index table to be used for the
|
||||
// next byte. The last byte is used to index into a table with 16-bit values.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
const maxSparseEntries = 16
|
||||
|
||||
type normCompacter struct {
|
||||
sparseBlocks [][]uint64
|
||||
sparseOffset []uint16
|
||||
sparseCount int
|
||||
name string
|
||||
}
|
||||
|
||||
func mostFrequentStride(a []uint64) int {
|
||||
counts := make(map[int]int)
|
||||
var v int
|
||||
for _, x := range a {
|
||||
if stride := int(x) - v; v != 0 && stride >= 0 {
|
||||
counts[stride]++
|
||||
}
|
||||
v = int(x)
|
||||
}
|
||||
var maxs, maxc int
|
||||
for stride, cnt := range counts {
|
||||
if cnt > maxc || (cnt == maxc && stride < maxs) {
|
||||
maxs, maxc = stride, cnt
|
||||
}
|
||||
}
|
||||
return maxs
|
||||
}
|
||||
|
||||
func countSparseEntries(a []uint64) int {
|
||||
stride := mostFrequentStride(a)
|
||||
var v, count int
|
||||
for _, tv := range a {
|
||||
if int(tv)-v != stride {
|
||||
if tv != 0 {
|
||||
count++
|
||||
}
|
||||
}
|
||||
v = int(tv)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
|
||||
if n := countSparseEntries(v); n <= maxSparseEntries {
|
||||
return (n+1)*4 + 2, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func (c *normCompacter) Store(v []uint64) uint32 {
|
||||
h := uint32(len(c.sparseOffset))
|
||||
c.sparseBlocks = append(c.sparseBlocks, v)
|
||||
c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
|
||||
c.sparseCount += countSparseEntries(v) + 1
|
||||
return h
|
||||
}
|
||||
|
||||
func (c *normCompacter) Handler() string {
|
||||
return c.name + "Sparse.lookup"
|
||||
}
|
||||
|
||||
func (c *normCompacter) Print(w io.Writer) (retErr error) {
|
||||
p := func(f string, x ...interface{}) {
|
||||
if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
|
||||
retErr = err
|
||||
}
|
||||
}
|
||||
|
||||
ls := len(c.sparseBlocks)
|
||||
p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
|
||||
p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
|
||||
|
||||
ns := c.sparseCount
|
||||
p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
|
||||
p("var %sSparseValues = [%d]valueRange {", c.name, ns)
|
||||
for i, b := range c.sparseBlocks {
|
||||
p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
|
||||
var v int
|
||||
stride := mostFrequentStride(b)
|
||||
n := countSparseEntries(b)
|
||||
p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
|
||||
for i, nv := range b {
|
||||
if int(nv)-v != stride {
|
||||
if v != 0 {
|
||||
p(",hi:%#02x},", 0x80+i-1)
|
||||
}
|
||||
if nv != 0 {
|
||||
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
|
||||
}
|
||||
}
|
||||
v = int(nv)
|
||||
}
|
||||
if v != 0 {
|
||||
p(",hi:%#02x},", 0x80+len(b)-1)
|
||||
}
|
||||
}
|
||||
p("\n}\n\n")
|
||||
return
|
||||
}
|
|
@ -93,7 +93,15 @@
|
|||
{"path":"github.com/tonnerre/golang-text","checksumSHA1":"t24KnvC9jRxiANVhpw2pqFpmEu8=","revision":"048ed3d792f7104850acbc8cfc01e5a6070f4c04","revisionTime":"2013-09-25T19:58:46Z"},
|
||||
{"path":"golang.org/x/net/context","checksumSHA1":"9jjO5GjLa0XF/nfWihF02RoH4qc=","revision":"075e191f18186a8ff2becaf64478e30f4545cdad","revisionTime":"2016-08-05T06:12:51Z"},
|
||||
{"path":"golang.org/x/net/context/ctxhttp","checksumSHA1":"WHc3uByvGaMcnSoI21fhzYgbOgg=","revision":"075e191f18186a8ff2becaf64478e30f4545cdad","revisionTime":"2016-08-05T06:12:51Z"},
|
||||
{"path":"golang.org/x/net/http2","checksumSHA1":"aaproqDPgHPV5s7lKzClAdCaDKQ=","revision":"01c190206fbdffa42f334f4b2bf2220f50e64920","revisionTime":"2017-11-02T18:53:09Z"},
|
||||
{"path":"golang.org/x/net/http2/hpack","checksumSHA1":"ezWhc7n/FtqkLDQKeU2JbW+80tE=","revision":"01c190206fbdffa42f334f4b2bf2220f50e64920","revisionTime":"2017-11-02T18:53:09Z"},
|
||||
{"path":"golang.org/x/net/idna","checksumSHA1":"RcrB7tgYS/GMW4QrwVdMOTNqIU8=","revision":"01c190206fbdffa42f334f4b2bf2220f50e64920","revisionTime":"2017-11-02T18:53:09Z"},
|
||||
{"path":"golang.org/x/net/lex/httplex","checksumSHA1":"3xyuaSNmClqG4YWC7g0isQIbUTc=","revision":"01c190206fbdffa42f334f4b2bf2220f50e64920","revisionTime":"2017-11-02T18:53:09Z"},
|
||||
{"path":"golang.org/x/sys/unix","checksumSHA1":"vlicYp+fe4ECQ+5QqpAk36VRA3s=","revision":"cd2c276457edda6df7fb04895d3fd6a6add42926","revisionTime":"2017-07-17T10:05:24Z"},
|
||||
{"path":"golang.org/x/text/secure/bidirule","checksumSHA1":"tltivJ/uj/lqLk05IqGfCv2F/E8=","revision":"88f656faf3f37f690df1a32515b479415e1a6769","revisionTime":"2017-10-26T07:52:28Z"},
|
||||
{"path":"golang.org/x/text/transform","checksumSHA1":"ziMb9+ANGRJSSIuxYdRbA+cDRBQ=","revision":"88f656faf3f37f690df1a32515b479415e1a6769","revisionTime":"2017-10-26T07:52:28Z"},
|
||||
{"path":"golang.org/x/text/unicode/bidi","checksumSHA1":"tk+lpF2CDV7e5RwwRY5ZTCGrd9o=","revision":"88f656faf3f37f690df1a32515b479415e1a6769","revisionTime":"2017-10-26T07:52:28Z"},
|
||||
{"path":"golang.org/x/text/unicode/norm","checksumSHA1":"BwRNKgzIMUxk56OScxyr43BV6IE=","revision":"88f656faf3f37f690df1a32515b479415e1a6769","revisionTime":"2017-10-26T07:52:28Z"},
|
||||
{"path":"golang.org/x/time/rate","checksumSHA1":"vGfePfr0+weQUeTM/71mu+LCFuE=","revision":"8be79e1e0910c292df4e79c241bb7e8f7e725959","revisionTime":"2017-04-24T23:28:54Z"}
|
||||
],
|
||||
"rootPath": "github.com/hashicorp/consul"
|
||||
|
|
Loading…
Reference in New Issue