mirror of https://github.com/k3s-io/k3s
github.com/PuerkitoBio/purell v1.1.0
Used only by github.com/go-openapi/..., all expecting v1.1.0k3s-v1.15.3
parent
7a47bc3d1d
commit
5627034673
2
go.mod
2
go.mod
|
@ -224,7 +224,7 @@ replace (
|
|||
github.com/Microsoft/hcsshim => github.com/Microsoft/hcsshim v0.0.0-20190110205307-69ac8d3f7fc1
|
||||
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
|
||||
github.com/Nvveen/Gotty => github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/Rican7/retry => github.com/Rican7/retry v0.0.0-20160712041035-272ad122d6e5
|
||||
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
|
||||
|
|
4
go.sum
4
go.sum
|
@ -26,8 +26,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAo
|
|||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
|
||||
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
|
||||
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
|
||||
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/Rican7/retry v0.0.0-20160712041035-272ad122d6e5 h1:lL761TuQsL4xn4Pn0CNk9YrPvrT8kzBMvW4YgCB7HMg=
|
||||
|
|
|
@ -43,7 +43,7 @@ replace (
|
|||
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
|
||||
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
|
||||
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
|
||||
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf
|
||||
|
|
|
@ -5,8 +5,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY
|
|||
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAokXB9qhlbKWPpo3KMLZQ5WB5WLQRW1uq0=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
|
||||
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
|
|
|
@ -81,7 +81,7 @@ replace (
|
|||
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
|
||||
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
|
||||
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
|
||||
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf
|
||||
|
|
|
@ -6,8 +6,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY
|
|||
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAokXB9qhlbKWPpo3KMLZQ5WB5WLQRW1uq0=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
|
||||
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
|
|
|
@ -29,7 +29,7 @@ require (
|
|||
replace (
|
||||
cloud.google.com/go => cloud.google.com/go v0.0.0-20160913182117-3b1ae45394a2
|
||||
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/davecgh/go-spew => github.com/davecgh/go-spew v0.0.0-20170626231645-782f4967f2dc
|
||||
github.com/dgrijalva/jwt-go => github.com/dgrijalva/jwt-go v0.0.0-20160705203006-01aeca54ebda
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
cloud.google.com/go v0.0.0-20160913182117-3b1ae45394a2/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
github.com/Azure/go-autorest v11.1.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
|
||||
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/davecgh/go-spew v0.0.0-20170626231645-782f4967f2dc h1:0A0n6a0Y3vW5ktoWKC+ggkGXRzMJWMvqIYlFmsjwQzY=
|
||||
|
|
|
@ -19,7 +19,7 @@ replace (
|
|||
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
|
||||
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
|
||||
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
|
||||
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf
|
||||
|
|
|
@ -3,7 +3,7 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
|
|||
github.com/Azure/go-autorest v11.1.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
|
||||
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
|
|
|
@ -16,7 +16,7 @@ replace (
|
|||
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
|
||||
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
|
||||
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
|
||||
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf
|
||||
|
|
|
@ -3,7 +3,7 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
|
|||
github.com/Azure/go-autorest v11.1.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
|
||||
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
|
|
|
@ -32,7 +32,7 @@ replace (
|
|||
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
|
||||
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
|
||||
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
|
||||
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf
|
||||
|
|
|
@ -5,8 +5,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY
|
|||
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAokXB9qhlbKWPpo3KMLZQ5WB5WLQRW1uq0=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
|
||||
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
|
|
|
@ -22,7 +22,7 @@ replace (
|
|||
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
|
||||
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
|
||||
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
|
||||
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf
|
||||
|
|
|
@ -5,8 +5,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY
|
|||
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAokXB9qhlbKWPpo3KMLZQ5WB5WLQRW1uq0=
|
||||
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
|
||||
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
|
|
|
@ -14,7 +14,7 @@ require (
|
|||
replace (
|
||||
cloud.google.com/go => cloud.google.com/go v0.0.0-20160913182117-3b1ae45394a2
|
||||
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
|
||||
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/davecgh/go-spew => github.com/davecgh/go-spew v0.0.0-20170626231645-782f4967f2dc
|
||||
github.com/dgrijalva/jwt-go => github.com/dgrijalva/jwt-go v0.0.0-20160705203006-01aeca54ebda
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
cloud.google.com/go v0.0.0-20160913182117-3b1ae45394a2/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
github.com/Azure/go-autorest v11.1.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
|
||||
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
|
||||
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
|
||||
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/davecgh/go-spew v0.0.0-20170626231645-782f4967f2dc h1:0A0n6a0Y3vW5ktoWKC+ggkGXRzMJWMvqIYlFmsjwQzY=
|
||||
|
|
|
@ -407,13 +407,12 @@ filegroup(
|
|||
"//vendor/golang.org/x/oauth2:all-srcs",
|
||||
"//vendor/golang.org/x/sys/unix:all-srcs",
|
||||
"//vendor/golang.org/x/sys/windows:all-srcs",
|
||||
"//vendor/golang.org/x/text/cases:all-srcs",
|
||||
"//vendor/golang.org/x/text/encoding:all-srcs",
|
||||
"//vendor/golang.org/x/text/internal:all-srcs",
|
||||
"//vendor/golang.org/x/text/internal/tag:all-srcs",
|
||||
"//vendor/golang.org/x/text/internal/utf8internal:all-srcs",
|
||||
"//vendor/golang.org/x/text/language:all-srcs",
|
||||
"//vendor/golang.org/x/text/runes:all-srcs",
|
||||
"//vendor/golang.org/x/text/secure/bidirule:all-srcs",
|
||||
"//vendor/golang.org/x/text/secure/precis:all-srcs",
|
||||
"//vendor/golang.org/x/text/transform:all-srcs",
|
||||
"//vendor/golang.org/x/text/unicode/bidi:all-srcs",
|
||||
"//vendor/golang.org/x/text/unicode/norm:all-srcs",
|
||||
|
|
|
@ -9,8 +9,8 @@ go_library(
|
|||
deps = [
|
||||
"//vendor/github.com/PuerkitoBio/urlesc:go_default_library",
|
||||
"//vendor/golang.org/x/net/idna:go_default_library",
|
||||
"//vendor/golang.org/x/text/secure/precis:go_default_library",
|
||||
"//vendor/golang.org/x/text/unicode/norm:go_default_library",
|
||||
"//vendor/golang.org/x/text/width:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ Based on the [wikipedia paper][wiki] and the [RFC 3986 document][rfc].
|
|||
|
||||
## Changelog
|
||||
|
||||
* **2016-11-14 (v1.1.0)** : IDN: Conform to RFC 5895: Fold character width (thanks to @beeker1121).
|
||||
* **2016-07-27 (v1.0.0)** : Normalize IDN to ASCII (thanks to @zenovich).
|
||||
* **2015-02-08** : Add fix for relative paths issue ([PR #5][pr5]) and add fix for unnecessary encoding of reserved characters ([see issue #7][iss7]).
|
||||
* **v0.2.0** : Add benchmarks, Attempt IDN support.
|
||||
|
@ -172,6 +173,7 @@ And with `FlagsUnsafeGreedy`:
|
|||
@opennota
|
||||
@pchristopher1275
|
||||
@zenovich
|
||||
@beeker1121
|
||||
|
||||
## License
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@ import (
|
|||
|
||||
"github.com/PuerkitoBio/urlesc"
|
||||
"golang.org/x/net/idna"
|
||||
"golang.org/x/text/secure/precis"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
"golang.org/x/text/width"
|
||||
)
|
||||
|
||||
// A set of normalization flags determines how a URL will
|
||||
|
@ -150,22 +150,26 @@ func MustNormalizeURLString(u string, f NormalizationFlags) string {
|
|||
// NormalizeURLString returns the normalized string, or an error if it can't be parsed into an URL object.
|
||||
// It takes an URL string as input, as well as the normalization flags.
|
||||
func NormalizeURLString(u string, f NormalizationFlags) (string, error) {
|
||||
if parsed, e := url.Parse(u); e != nil {
|
||||
return "", e
|
||||
} else {
|
||||
options := make([]precis.Option, 1, 3)
|
||||
options[0] = precis.IgnoreCase
|
||||
if f&FlagLowercaseHost == FlagLowercaseHost {
|
||||
options = append(options, precis.FoldCase())
|
||||
}
|
||||
options = append(options, precis.Norm(norm.NFC))
|
||||
profile := precis.NewFreeform(options...)
|
||||
if parsed.Host, e = idna.ToASCII(profile.NewTransformer().String(parsed.Host)); e != nil {
|
||||
return "", e
|
||||
}
|
||||
return NormalizeURL(parsed, f), nil
|
||||
parsed, err := url.Parse(u)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
panic("Unreachable code.")
|
||||
|
||||
if f&FlagLowercaseHost == FlagLowercaseHost {
|
||||
parsed.Host = strings.ToLower(parsed.Host)
|
||||
}
|
||||
|
||||
// The idna package doesn't fully conform to RFC 5895
|
||||
// (https://tools.ietf.org/html/rfc5895), so we do it here.
|
||||
// Taken from Go 1.8 cycle source, courtesy of bradfitz.
|
||||
// TODO: Remove when (if?) idna package conforms to RFC 5895.
|
||||
parsed.Host = width.Fold.String(parsed.Host)
|
||||
parsed.Host = norm.NFC.String(parsed.Host)
|
||||
if parsed.Host, err = idna.ToASCII(parsed.Host); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return NormalizeURL(parsed, f), nil
|
||||
}
|
||||
|
||||
// NormalizeURL returns the normalized string.
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"cases.go",
|
||||
"context.go",
|
||||
"fold.go",
|
||||
"info.go",
|
||||
"map.go",
|
||||
"tables.go",
|
||||
"trieval.go",
|
||||
],
|
||||
cgo = True,
|
||||
importmap = "k8s.io/kubernetes/vendor/golang.org/x/text/cases",
|
||||
importpath = "golang.org/x/text/cases",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//vendor/golang.org/x/text/internal:go_default_library",
|
||||
"//vendor/golang.org/x/text/language:go_default_library",
|
||||
"//vendor/golang.org/x/text/transform:go_default_library",
|
||||
"//vendor/golang.org/x/text/unicode/norm:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
|
@ -1,162 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go
|
||||
|
||||
// Package cases provides general and language-specific case mappers.
|
||||
package cases // import "golang.org/x/text/cases"
|
||||
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// References:
|
||||
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
|
||||
// - http://www.unicode.org/reports/tr29/
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
|
||||
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
|
||||
// - http://userguide.icu-project.org/transforms/casemappings
|
||||
|
||||
// TODO:
|
||||
// - Case folding
|
||||
// - Wide and Narrow?
|
||||
// - Segmenter option for title casing.
|
||||
// - ASCII fast paths
|
||||
// - Encode Soft-Dotted property within trie somehow.
|
||||
|
||||
// A Caser transforms given input to a certain case. It implements
|
||||
// transform.Transformer.
|
||||
//
|
||||
// A Caser may be stateful and should therefore not be shared between
|
||||
// goroutines.
|
||||
type Caser struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of converting b to the case
|
||||
// form implemented by c.
|
||||
func (c Caser) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(c.t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of transforming s to the case form
|
||||
// implemented by c.
|
||||
func (c Caser) String(s string) string {
|
||||
s, _, _ = transform.String(c.t, s)
|
||||
return s
|
||||
}
|
||||
|
||||
// Reset resets the Caser to be reused for new input after a previous call to
|
||||
// Transform.
|
||||
func (c Caser) Reset() { c.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface and transforms the
|
||||
// given input to the case form implemented by c.
|
||||
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return c.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Span implements the transform.SpanningTransformer interface.
|
||||
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return c.t.Span(src, atEOF)
|
||||
}
|
||||
|
||||
// Upper returns a Caser for language-specific uppercasing.
|
||||
func Upper(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeUpper(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Lower returns a Caser for language-specific lowercasing.
|
||||
func Lower(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeLower(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Title returns a Caser for language-specific title casing. It uses an
|
||||
// approximation of the default Unicode Word Break algorithm.
|
||||
func Title(t language.Tag, opts ...Option) Caser {
|
||||
return Caser{makeTitle(t, getOpts(opts...))}
|
||||
}
|
||||
|
||||
// Fold returns a Caser that implements Unicode case folding. The returned Caser
|
||||
// is stateless and safe to use concurrently by multiple goroutines.
|
||||
//
|
||||
// Case folding does not normalize the input and may not preserve a normal form.
|
||||
// Use the collate or search package for more convenient and linguistically
|
||||
// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
|
||||
// where security aspects are a concern.
|
||||
func Fold(opts ...Option) Caser {
|
||||
return Caser{makeFold(getOpts(opts...))}
|
||||
}
|
||||
|
||||
// An Option is used to modify the behavior of a Caser.
|
||||
type Option func(o options) options
|
||||
|
||||
// TODO: consider these options to take a boolean as well, like FinalSigma.
|
||||
// The advantage of using this approach is that other providers of a lower-case
|
||||
// algorithm could set different defaults by prefixing a user-provided slice
|
||||
// of options with their own. This is handy, for instance, for the precis
|
||||
// package which would override the default to not handle the Greek final sigma.
|
||||
|
||||
var (
|
||||
// NoLower disables the lowercasing of non-leading letters for a title
|
||||
// caser.
|
||||
NoLower Option = noLower
|
||||
|
||||
// Compact omits mappings in case folding for characters that would grow the
|
||||
// input. (Unimplemented.)
|
||||
Compact Option = compact
|
||||
)
|
||||
|
||||
// TODO: option to preserve a normal form, if applicable?
|
||||
|
||||
type options struct {
|
||||
noLower bool
|
||||
simple bool
|
||||
|
||||
// TODO: segmenter, max ignorable, alternative versions, etc.
|
||||
|
||||
ignoreFinalSigma bool
|
||||
}
|
||||
|
||||
func getOpts(o ...Option) (res options) {
|
||||
for _, f := range o {
|
||||
res = f(res)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func noLower(o options) options {
|
||||
o.noLower = true
|
||||
return o
|
||||
}
|
||||
|
||||
func compact(o options) options {
|
||||
o.simple = true
|
||||
return o
|
||||
}
|
||||
|
||||
// HandleFinalSigma specifies whether the special handling of Greek final sigma
|
||||
// should be enabled. Unicode prescribes handling the Greek final sigma for all
|
||||
// locales, but standards like IDNA and PRECIS override this default.
|
||||
func HandleFinalSigma(enable bool) Option {
|
||||
if enable {
|
||||
return handleFinalSigma
|
||||
}
|
||||
return ignoreFinalSigma
|
||||
}
|
||||
|
||||
func ignoreFinalSigma(o options) options {
|
||||
o.ignoreFinalSigma = true
|
||||
return o
|
||||
}
|
||||
|
||||
func handleFinalSigma(o options) options {
|
||||
o.ignoreFinalSigma = false
|
||||
return o
|
||||
}
|
|
@ -1,376 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
// A context is used for iterating over source bytes, fetching case info and
|
||||
// writing to a destination buffer.
|
||||
//
|
||||
// Casing operations may need more than one rune of context to decide how a rune
|
||||
// should be cased. Casing implementations should call checkpoint on context
|
||||
// whenever it is known to be safe to return the runes processed so far.
|
||||
//
|
||||
// It is recommended for implementations to not allow for more than 30 case
|
||||
// ignorables as lookahead (analogous to the limit in norm) and to use state if
|
||||
// unbounded lookahead is needed for cased runes.
|
||||
type context struct {
|
||||
dst, src []byte
|
||||
atEOF bool
|
||||
|
||||
pDst int // pDst points past the last written rune in dst.
|
||||
pSrc int // pSrc points to the start of the currently scanned rune.
|
||||
|
||||
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
|
||||
nDst, nSrc int
|
||||
err error
|
||||
|
||||
sz int // size of current rune
|
||||
info info // case information of currently scanned rune
|
||||
|
||||
// State preserved across calls to Transform.
|
||||
isMidWord bool // false if next cased letter needs to be title-cased.
|
||||
}
|
||||
|
||||
func (c *context) Reset() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
|
||||
// ret returns the return values for the Transform method. It checks whether
|
||||
// there were insufficient bytes in src to complete and introduces an error
|
||||
// accordingly, if necessary.
|
||||
func (c *context) ret() (nDst, nSrc int, err error) {
|
||||
if c.err != nil || c.nSrc == len(c.src) {
|
||||
return c.nDst, c.nSrc, c.err
|
||||
}
|
||||
// This point is only reached by mappers if there was no short destination
|
||||
// buffer. This means that the source buffer was exhausted and that c.sz was
|
||||
// set to 0 by next.
|
||||
if c.atEOF && c.pSrc == len(c.src) {
|
||||
return c.pDst, c.pSrc, nil
|
||||
}
|
||||
return c.nDst, c.nSrc, transform.ErrShortSrc
|
||||
}
|
||||
|
||||
// retSpan returns the return values for the Span method. It checks whether
|
||||
// there were insufficient bytes in src to complete and introduces an error
|
||||
// accordingly, if necessary.
|
||||
func (c *context) retSpan() (n int, err error) {
|
||||
_, nSrc, err := c.ret()
|
||||
return nSrc, err
|
||||
}
|
||||
|
||||
// checkpoint sets the return value buffer points for Transform to the current
|
||||
// positions.
|
||||
func (c *context) checkpoint() {
|
||||
if c.err == nil {
|
||||
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
|
||||
}
|
||||
}
|
||||
|
||||
// unreadRune causes the last rune read by next to be reread on the next
|
||||
// invocation of next. Only one unreadRune may be called after a call to next.
|
||||
func (c *context) unreadRune() {
|
||||
c.sz = 0
|
||||
}
|
||||
|
||||
func (c *context) next() bool {
|
||||
c.pSrc += c.sz
|
||||
if c.pSrc == len(c.src) || c.err != nil {
|
||||
c.info, c.sz = 0, 0
|
||||
return false
|
||||
}
|
||||
v, sz := trie.lookup(c.src[c.pSrc:])
|
||||
c.info, c.sz = info(v), sz
|
||||
if c.sz == 0 {
|
||||
if c.atEOF {
|
||||
// A zero size means we have an incomplete rune. If we are atEOF,
|
||||
// this means it is an illegal rune, which we will consume one
|
||||
// byte at a time.
|
||||
c.sz = 1
|
||||
} else {
|
||||
c.err = transform.ErrShortSrc
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// writeBytes adds bytes to dst.
|
||||
func (c *context) writeBytes(b []byte) bool {
|
||||
if len(c.dst)-c.pDst < len(b) {
|
||||
c.err = transform.ErrShortDst
|
||||
return false
|
||||
}
|
||||
// This loop is faster than using copy.
|
||||
for _, ch := range b {
|
||||
c.dst[c.pDst] = ch
|
||||
c.pDst++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// writeString writes the given string to dst.
|
||||
func (c *context) writeString(s string) bool {
|
||||
if len(c.dst)-c.pDst < len(s) {
|
||||
c.err = transform.ErrShortDst
|
||||
return false
|
||||
}
|
||||
// This loop is faster than using copy.
|
||||
for i := 0; i < len(s); i++ {
|
||||
c.dst[c.pDst] = s[i]
|
||||
c.pDst++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// copy writes the current rune to dst.
|
||||
func (c *context) copy() bool {
|
||||
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
|
||||
}
|
||||
|
||||
// copyXOR copies the current rune to dst and modifies it by applying the XOR
|
||||
// pattern of the case info. It is the responsibility of the caller to ensure
|
||||
// that this is a rune with a XOR pattern defined.
|
||||
func (c *context) copyXOR() bool {
|
||||
if !c.copy() {
|
||||
return false
|
||||
}
|
||||
if c.info&xorIndexBit == 0 {
|
||||
// Fast path for 6-bit XOR pattern, which covers most cases.
|
||||
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
|
||||
} else {
|
||||
// Interpret XOR bits as an index.
|
||||
// TODO: test performance for unrolling this loop. Verify that we have
|
||||
// at least two bytes and at most three.
|
||||
idx := c.info >> xorShift
|
||||
for p := c.pDst - 1; ; p-- {
|
||||
c.dst[p] ^= xorData[idx]
|
||||
idx--
|
||||
if xorData[idx] == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// hasPrefix returns true if src[pSrc:] starts with the given string.
|
||||
func (c *context) hasPrefix(s string) bool {
|
||||
b := c.src[c.pSrc:]
|
||||
if len(b) < len(s) {
|
||||
return false
|
||||
}
|
||||
for i, c := range b[:len(s)] {
|
||||
if c != s[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// caseType returns an info with only the case bits, normalized to either
|
||||
// cLower, cUpper, cTitle or cUncased.
|
||||
func (c *context) caseType() info {
|
||||
cm := c.info & 0x7
|
||||
if cm < 4 {
|
||||
return cm
|
||||
}
|
||||
if cm >= cXORCase {
|
||||
// xor the last bit of the rune with the case type bits.
|
||||
b := c.src[c.pSrc+c.sz-1]
|
||||
return info(b&1) ^ cm&0x3
|
||||
}
|
||||
if cm == cIgnorableCased {
|
||||
return cLower
|
||||
}
|
||||
return cUncased
|
||||
}
|
||||
|
||||
// lower writes the lowercase version of the current rune to dst.
|
||||
func lower(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||
return c.writeString(e[offset : offset+nLower])
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
func isLower(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cLower {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// upper writes the uppercase version of the current rune to dst.
|
||||
func upper(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
// Get length of first special case mapping.
|
||||
n := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cTitle {
|
||||
// The first special case mapping is for lower. Set n to the second.
|
||||
if n == noChange {
|
||||
n = 0
|
||||
}
|
||||
n, e = e[1]&lengthMask, e[n:]
|
||||
}
|
||||
if n != noChange {
|
||||
return c.writeString(e[offset : offset+n])
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
// isUpper writes the isUppercase version of the current rune to dst.
|
||||
func isUpper(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
// Get length of first special case mapping.
|
||||
n := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cTitle {
|
||||
n = e[1] & lengthMask
|
||||
}
|
||||
if n != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// title writes the title case version of the current rune to dst.
|
||||
func title(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||
return c.copy()
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct == cLower {
|
||||
return c.copyXOR()
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
// Get the exception data.
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||||
|
||||
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||
if nFirst != noChange {
|
||||
e = e[nFirst:]
|
||||
}
|
||||
return c.writeString(e[offset : offset+nTitle])
|
||||
}
|
||||
if ct == cLower && nFirst != noChange {
|
||||
// Use the uppercase version instead.
|
||||
return c.writeString(e[offset : offset+nFirst])
|
||||
}
|
||||
// Already in correct case.
|
||||
return c.copy()
|
||||
}
|
||||
|
||||
// isTitle reports whether the current rune is in title case.
|
||||
func isTitle(c *context) bool {
|
||||
ct := c.caseType()
|
||||
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||||
return true
|
||||
}
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct == cLower {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
// Get the exception data.
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
nFirst := (e[1] >> lengthBits) & lengthMask
|
||||
if ct == cLower && nFirst != noChange {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// foldFull writes the foldFull version of the current rune to dst.
|
||||
func foldFull(c *context) bool {
|
||||
if c.info&hasMappingMask == 0 {
|
||||
return c.copy()
|
||||
}
|
||||
ct := c.caseType()
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||
return c.copyXOR()
|
||||
}
|
||||
return c.copy()
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
n := e[0] & lengthMask
|
||||
if n == 0 {
|
||||
if ct == cLower {
|
||||
return c.copy()
|
||||
}
|
||||
n = (e[1] >> lengthBits) & lengthMask
|
||||
}
|
||||
return c.writeString(e[2 : 2+n])
|
||||
}
|
||||
|
||||
// isFoldFull reports whether the current run is mapped to foldFull
|
||||
func isFoldFull(c *context) bool {
|
||||
if c.info&hasMappingMask == 0 {
|
||||
return true
|
||||
}
|
||||
ct := c.caseType()
|
||||
if c.info&exceptionBit == 0 {
|
||||
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
e := exceptions[c.info>>exceptionShift:]
|
||||
n := e[0] & lengthMask
|
||||
if n == 0 && ct == cLower {
|
||||
return true
|
||||
}
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
type caseFolder struct{ transform.NopResetter }
|
||||
|
||||
// caseFolder implements the Transformer interface for doing case folding.
|
||||
func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() {
|
||||
foldFull(&c)
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isFoldFull(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
func makeFold(o options) transform.SpanningTransformer {
|
||||
// TODO: Special case folding, through option Language, Special/Turkic, or
|
||||
// both.
|
||||
// TODO: Implement Compact options.
|
||||
return &caseFolder{}
|
||||
}
|
|
@ -1,839 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// This program generates the trie for casing operations. The Unicode casing
|
||||
// algorithm requires the lookup of various properties and mappings for each
|
||||
// rune. The table generated by this generator combines several of the most
|
||||
// frequently used of these into a single trie so that they can be accessed
|
||||
// with a single lookup.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
genTables()
|
||||
genTablesTest()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "cases")
|
||||
}
|
||||
|
||||
// runeInfo contains all information for a rune that we care about for casing
|
||||
// operations.
|
||||
type runeInfo struct {
|
||||
Rune rune
|
||||
|
||||
entry info // trie value for this rune.
|
||||
|
||||
CaseMode info
|
||||
|
||||
// Simple case mappings.
|
||||
Simple [1 + maxCaseMode][]rune
|
||||
|
||||
// Special casing
|
||||
HasSpecial bool
|
||||
Conditional bool
|
||||
Special [1 + maxCaseMode][]rune
|
||||
|
||||
// Folding
|
||||
FoldSimple rune
|
||||
FoldSpecial rune
|
||||
FoldFull []rune
|
||||
|
||||
// TODO: FC_NFKC, or equivalent data.
|
||||
|
||||
// Properties
|
||||
SoftDotted bool
|
||||
CaseIgnorable bool
|
||||
Cased bool
|
||||
DecomposeGreek bool
|
||||
BreakType string
|
||||
BreakCat breakCategory
|
||||
|
||||
// We care mostly about 0, Above, and IotaSubscript.
|
||||
CCC byte
|
||||
}
|
||||
|
||||
type breakCategory int
|
||||
|
||||
const (
|
||||
breakBreak breakCategory = iota
|
||||
breakLetter
|
||||
breakMid
|
||||
)
|
||||
|
||||
// mapping returns the case mapping for the given case type.
|
||||
func (r *runeInfo) mapping(c info) string {
|
||||
if r.HasSpecial {
|
||||
return string(r.Special[c])
|
||||
}
|
||||
if len(r.Simple[c]) != 0 {
|
||||
return string(r.Simple[c])
|
||||
}
|
||||
return string(r.Rune)
|
||||
}
|
||||
|
||||
func parse(file string, f func(p *ucd.Parser)) {
|
||||
ucd.Parse(gen.OpenUCDFile(file), f)
|
||||
}
|
||||
|
||||
func parseUCD() []runeInfo {
|
||||
chars := make([]runeInfo, unicode.MaxRune)
|
||||
|
||||
get := func(r rune) *runeInfo {
|
||||
c := &chars[r]
|
||||
c.Rune = r
|
||||
return c
|
||||
}
|
||||
|
||||
parse("UnicodeData.txt", func(p *ucd.Parser) {
|
||||
ri := get(p.Rune(0))
|
||||
ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
|
||||
ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
|
||||
ri.Simple[cUpper] = p.Runes(ucd.SimpleUppercaseMapping)
|
||||
ri.Simple[cTitle] = p.Runes(ucd.SimpleTitlecaseMapping)
|
||||
if p.String(ucd.GeneralCategory) == "Lt" {
|
||||
ri.CaseMode = cTitle
|
||||
}
|
||||
})
|
||||
|
||||
// <code>; <property>
|
||||
parse("PropList.txt", func(p *ucd.Parser) {
|
||||
if p.String(1) == "Soft_Dotted" {
|
||||
chars[p.Rune(0)].SoftDotted = true
|
||||
}
|
||||
})
|
||||
|
||||
// <code>; <word break type>
|
||||
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
|
||||
ri := get(p.Rune(0))
|
||||
switch p.String(1) {
|
||||
case "Case_Ignorable":
|
||||
ri.CaseIgnorable = true
|
||||
case "Cased":
|
||||
ri.Cased = true
|
||||
case "Lowercase":
|
||||
ri.CaseMode = cLower
|
||||
case "Uppercase":
|
||||
ri.CaseMode = cUpper
|
||||
}
|
||||
})
|
||||
|
||||
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
|
||||
parse("SpecialCasing.txt", func(p *ucd.Parser) {
|
||||
// We drop all conditional special casing and deal with them manually in
|
||||
// the language-specific case mappers. Rune 0x03A3 is the only one with
|
||||
// a conditional formatting that is not language-specific. However,
|
||||
// dealing with this letter is tricky, especially in a streaming
|
||||
// context, so we deal with it in the Caser for Greek specifically.
|
||||
ri := get(p.Rune(0))
|
||||
if p.String(4) == "" {
|
||||
ri.HasSpecial = true
|
||||
ri.Special[cLower] = p.Runes(1)
|
||||
ri.Special[cTitle] = p.Runes(2)
|
||||
ri.Special[cUpper] = p.Runes(3)
|
||||
} else {
|
||||
ri.Conditional = true
|
||||
}
|
||||
})
|
||||
|
||||
// TODO: Use text breaking according to UAX #29.
|
||||
// <code>; <word break type>
|
||||
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
|
||||
ri := get(p.Rune(0))
|
||||
ri.BreakType = p.String(1)
|
||||
|
||||
// We collapse the word breaking properties onto the categories we need.
|
||||
switch p.String(1) { // TODO: officially we need to canonicalize.
|
||||
case "MidLetter", "MidNumLet", "Single_Quote":
|
||||
ri.BreakCat = breakMid
|
||||
if !ri.CaseIgnorable {
|
||||
// finalSigma relies on the fact that all breakMid runes are
|
||||
// also a Case_Ignorable. Revisit this code when this changes.
|
||||
log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
|
||||
}
|
||||
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
|
||||
ri.BreakCat = breakLetter
|
||||
}
|
||||
})
|
||||
|
||||
// <code>; <type>; <mapping>
|
||||
parse("CaseFolding.txt", func(p *ucd.Parser) {
|
||||
ri := get(p.Rune(0))
|
||||
switch p.String(1) {
|
||||
case "C":
|
||||
ri.FoldSimple = p.Rune(2)
|
||||
ri.FoldFull = p.Runes(2)
|
||||
case "S":
|
||||
ri.FoldSimple = p.Rune(2)
|
||||
case "T":
|
||||
ri.FoldSpecial = p.Rune(2)
|
||||
case "F":
|
||||
ri.FoldFull = p.Runes(2)
|
||||
default:
|
||||
log.Fatalf("%U: unknown type: %s", p.Rune(0), p.String(1))
|
||||
}
|
||||
})
|
||||
|
||||
return chars
|
||||
}
|
||||
|
||||
func genTables() {
|
||||
chars := parseUCD()
|
||||
verifyProperties(chars)
|
||||
|
||||
t := triegen.NewTrie("case")
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
makeEntry(c)
|
||||
t.Insert(rune(i), uint64(c.entry))
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "cases")
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
// TODO: write CLDR version after adding a mechanism to detect that the
|
||||
// tables on which the manually created locale-sensitive casing code is
|
||||
// based hasn't changed.
|
||||
|
||||
w.WriteVar("xorData", string(xorData))
|
||||
w.WriteVar("exceptions", string(exceptionData))
|
||||
|
||||
sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
w.Size += sz
|
||||
}
|
||||
|
||||
func makeEntry(ri *runeInfo) {
|
||||
if ri.CaseIgnorable {
|
||||
if ri.Cased {
|
||||
ri.entry = cIgnorableCased
|
||||
} else {
|
||||
ri.entry = cIgnorableUncased
|
||||
}
|
||||
} else {
|
||||
ri.entry = ri.CaseMode
|
||||
}
|
||||
|
||||
// TODO: handle soft-dotted.
|
||||
|
||||
ccc := cccOther
|
||||
switch ri.CCC {
|
||||
case 0: // Not_Reordered
|
||||
ccc = cccZero
|
||||
case above: // Above
|
||||
ccc = cccAbove
|
||||
}
|
||||
switch ri.BreakCat {
|
||||
case breakBreak:
|
||||
ccc = cccBreak
|
||||
case breakMid:
|
||||
ri.entry |= isMidBit
|
||||
}
|
||||
|
||||
ri.entry |= ccc
|
||||
|
||||
if ri.CaseMode == cUncased {
|
||||
return
|
||||
}
|
||||
|
||||
// Need to do something special.
|
||||
if ri.CaseMode == cTitle || ri.HasSpecial || ri.mapping(cTitle) != ri.mapping(cUpper) {
|
||||
makeException(ri)
|
||||
return
|
||||
}
|
||||
if f := string(ri.FoldFull); len(f) > 0 && f != ri.mapping(cUpper) && f != ri.mapping(cLower) {
|
||||
makeException(ri)
|
||||
return
|
||||
}
|
||||
|
||||
// Rune is either lowercase or uppercase.
|
||||
|
||||
orig := string(ri.Rune)
|
||||
mapped := ""
|
||||
if ri.CaseMode == cUpper {
|
||||
mapped = ri.mapping(cLower)
|
||||
} else {
|
||||
mapped = ri.mapping(cUpper)
|
||||
}
|
||||
|
||||
if len(orig) != len(mapped) {
|
||||
makeException(ri)
|
||||
return
|
||||
}
|
||||
|
||||
if string(ri.FoldFull) == ri.mapping(cUpper) {
|
||||
ri.entry |= inverseFoldBit
|
||||
}
|
||||
|
||||
n := len(orig)
|
||||
|
||||
// Create per-byte XOR mask.
|
||||
var b []byte
|
||||
for i := 0; i < n; i++ {
|
||||
b = append(b, orig[i]^mapped[i])
|
||||
}
|
||||
|
||||
// Remove leading 0 bytes, but keep at least one byte.
|
||||
for ; len(b) > 1 && b[0] == 0; b = b[1:] {
|
||||
}
|
||||
|
||||
if len(b) == 1 && b[0]&0xc0 == 0 {
|
||||
ri.entry |= info(b[0]) << xorShift
|
||||
return
|
||||
}
|
||||
|
||||
key := string(b)
|
||||
x, ok := xorCache[key]
|
||||
if !ok {
|
||||
xorData = append(xorData, 0) // for detecting start of sequence
|
||||
xorData = append(xorData, b...)
|
||||
|
||||
x = len(xorData) - 1
|
||||
xorCache[key] = x
|
||||
}
|
||||
ri.entry |= info(x<<xorShift) | xorIndexBit
|
||||
}
|
||||
|
||||
var xorCache = map[string]int{}
|
||||
|
||||
// xorData contains byte-wise XOR data for the least significant bytes of a
|
||||
// UTF-8 encoded rune. An index points to the last byte. The sequence starts
|
||||
// with a zero terminator.
|
||||
var xorData = []byte{}
|
||||
|
||||
// See the comments in gen_trieval.go re "the exceptions slice".
|
||||
var exceptionData = []byte{0}
|
||||
|
||||
// makeException encodes case mappings that cannot be expressed in a simple
|
||||
// XOR diff.
|
||||
func makeException(ri *runeInfo) {
|
||||
ccc := ri.entry & cccMask
|
||||
// Set exception bit and retain case type.
|
||||
ri.entry &= 0x0007
|
||||
ri.entry |= exceptionBit
|
||||
|
||||
if len(exceptionData) >= 1<<numExceptionBits {
|
||||
log.Fatalf("%U:exceptionData too large %x > %d bits", ri.Rune, len(exceptionData), numExceptionBits)
|
||||
}
|
||||
|
||||
// Set the offset in the exceptionData array.
|
||||
ri.entry |= info(len(exceptionData) << exceptionShift)
|
||||
|
||||
orig := string(ri.Rune)
|
||||
tc := ri.mapping(cTitle)
|
||||
uc := ri.mapping(cUpper)
|
||||
lc := ri.mapping(cLower)
|
||||
ff := string(ri.FoldFull)
|
||||
|
||||
// addString sets the length of a string and adds it to the expansions array.
|
||||
addString := func(s string, b *byte) {
|
||||
if len(s) == 0 {
|
||||
// Zero-length mappings exist, but only for conditional casing,
|
||||
// which we are representing outside of this table.
|
||||
log.Fatalf("%U: has zero-length mapping.", ri.Rune)
|
||||
}
|
||||
*b <<= 3
|
||||
if s != orig {
|
||||
n := len(s)
|
||||
if n > 7 {
|
||||
log.Fatalf("%U: mapping larger than 7 (%d)", ri.Rune, n)
|
||||
}
|
||||
*b |= byte(n)
|
||||
exceptionData = append(exceptionData, s...)
|
||||
}
|
||||
}
|
||||
|
||||
// byte 0:
|
||||
exceptionData = append(exceptionData, byte(ccc)|byte(len(ff)))
|
||||
|
||||
// byte 1:
|
||||
p := len(exceptionData)
|
||||
exceptionData = append(exceptionData, 0)
|
||||
|
||||
if len(ff) > 7 { // May be zero-length.
|
||||
log.Fatalf("%U: fold string larger than 7 (%d)", ri.Rune, len(ff))
|
||||
}
|
||||
exceptionData = append(exceptionData, ff...)
|
||||
ct := ri.CaseMode
|
||||
if ct != cLower {
|
||||
addString(lc, &exceptionData[p])
|
||||
}
|
||||
if ct != cUpper {
|
||||
addString(uc, &exceptionData[p])
|
||||
}
|
||||
if ct != cTitle {
|
||||
// If title is the same as upper, we set it to the original string so
|
||||
// that it will be marked as not present. This implies title case is
|
||||
// the same as upper case.
|
||||
if tc == uc {
|
||||
tc = orig
|
||||
}
|
||||
addString(tc, &exceptionData[p])
|
||||
}
|
||||
}
|
||||
|
||||
// sparseCompacter is a trie value block Compacter. There are many cases where
|
||||
// successive runes alternate between lower- and upper-case. This Compacter
|
||||
// exploits this by adding a special case type where the case value is obtained
|
||||
// from or-ing it with the least-significant bit of the rune, creating large
|
||||
// ranges of equal case values that compress well.
|
||||
type sparseCompacter struct {
|
||||
sparseBlocks [][]uint16
|
||||
sparseOffsets []uint16
|
||||
sparseCount int
|
||||
}
|
||||
|
||||
// makeSparse returns the number of elements that compact block would contain
|
||||
// as well as the modified values.
|
||||
func makeSparse(vals []uint64) ([]uint16, int) {
|
||||
// Copy the values.
|
||||
values := make([]uint16, len(vals))
|
||||
for i, v := range vals {
|
||||
values[i] = uint16(v)
|
||||
}
|
||||
|
||||
alt := func(i int, v uint16) uint16 {
|
||||
if cm := info(v & fullCasedMask); cm == cUpper || cm == cLower {
|
||||
// Convert cLower or cUpper to cXORCase value, which has the form 11x.
|
||||
xor := v
|
||||
xor &^= 1
|
||||
xor |= uint16(i&1) ^ (v & 1)
|
||||
xor |= 0x4
|
||||
return xor
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
var count int
|
||||
var previous uint16
|
||||
for i, v := range values {
|
||||
if v != 0 {
|
||||
// Try if the unmodified value is equal to the previous.
|
||||
if v == previous {
|
||||
continue
|
||||
}
|
||||
|
||||
// Try if the xor-ed value is equal to the previous value.
|
||||
a := alt(i, v)
|
||||
if a == previous {
|
||||
values[i] = a
|
||||
continue
|
||||
}
|
||||
|
||||
// This is a new value.
|
||||
count++
|
||||
|
||||
// Use the xor-ed value if it will be identical to the next value.
|
||||
if p := i + 1; p < len(values) && alt(p, values[p]) == a {
|
||||
values[i] = a
|
||||
v = a
|
||||
}
|
||||
}
|
||||
previous = v
|
||||
}
|
||||
return values, count
|
||||
}
|
||||
|
||||
func (s *sparseCompacter) Size(v []uint64) (int, bool) {
|
||||
_, n := makeSparse(v)
|
||||
|
||||
// We limit using this method to having 16 entries.
|
||||
if n > 16 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return 2 + int(reflect.TypeOf(valueRange{}).Size())*n, true
|
||||
}
|
||||
|
||||
func (s *sparseCompacter) Store(v []uint64) uint32 {
|
||||
h := uint32(len(s.sparseOffsets))
|
||||
values, sz := makeSparse(v)
|
||||
s.sparseBlocks = append(s.sparseBlocks, values)
|
||||
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
|
||||
s.sparseCount += sz
|
||||
return h
|
||||
}
|
||||
|
||||
func (s *sparseCompacter) Handler() string {
|
||||
// The sparse global variable and its lookup method is defined in gen_trieval.go.
|
||||
return "sparse.lookup"
|
||||
}
|
||||
|
||||
func (s *sparseCompacter) Print(w io.Writer) (retErr error) {
|
||||
p := func(format string, args ...interface{}) {
|
||||
_, err := fmt.Fprintf(w, format, args...)
|
||||
if retErr == nil && err != nil {
|
||||
retErr = err
|
||||
}
|
||||
}
|
||||
|
||||
ls := len(s.sparseBlocks)
|
||||
if ls == len(s.sparseOffsets) {
|
||||
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
|
||||
}
|
||||
p("// sparseOffsets: %d entries, %d bytes\n", ls+1, (ls+1)*2)
|
||||
p("var sparseOffsets = %#v\n\n", s.sparseOffsets)
|
||||
|
||||
ns := s.sparseCount
|
||||
p("// sparseValues: %d entries, %d bytes\n", ns, ns*4)
|
||||
p("var sparseValues = [%d]valueRange {", ns)
|
||||
for i, values := range s.sparseBlocks {
|
||||
p("\n// Block %#x, offset %#x", i, s.sparseOffsets[i])
|
||||
var v uint16
|
||||
for i, nv := range values {
|
||||
if nv != v {
|
||||
if v != 0 {
|
||||
p(",hi:%#02x},", 0x80+i-1)
|
||||
}
|
||||
if nv != 0 {
|
||||
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
|
||||
}
|
||||
}
|
||||
v = nv
|
||||
}
|
||||
if v != 0 {
|
||||
p(",hi:%#02x},", 0x80+len(values)-1)
|
||||
}
|
||||
}
|
||||
p("\n}\n\n")
|
||||
return
|
||||
}
|
||||
|
||||
// verifyProperties that properties of the runes that are relied upon in the
|
||||
// implementation. Each property is marked with an identifier that is referred
|
||||
// to in the places where it is used.
|
||||
func verifyProperties(chars []runeInfo) {
|
||||
for i, c := range chars {
|
||||
r := rune(i)
|
||||
|
||||
// Rune properties.
|
||||
|
||||
// A.1: modifier never changes on lowercase. [ltLower]
|
||||
if c.CCC > 0 && unicode.ToLower(r) != r {
|
||||
log.Fatalf("%U: non-starter changes when lowercased", r)
|
||||
}
|
||||
|
||||
// A.2: properties of decompositions starting with I or J. [ltLower]
|
||||
d := norm.NFD.PropertiesString(string(r)).Decomposition()
|
||||
if len(d) > 0 {
|
||||
if d[0] == 'I' || d[0] == 'J' {
|
||||
// A.2.1: we expect at least an ASCII character and a modifier.
|
||||
if len(d) < 3 {
|
||||
log.Fatalf("%U: length of decomposition was %d; want >= 3", r, len(d))
|
||||
}
|
||||
|
||||
// All subsequent runes are modifiers and all have the same CCC.
|
||||
runes := []rune(string(d[1:]))
|
||||
ccc := chars[runes[0]].CCC
|
||||
|
||||
for _, mr := range runes[1:] {
|
||||
mc := chars[mr]
|
||||
|
||||
// A.2.2: all modifiers have a CCC of Above or less.
|
||||
if ccc == 0 || ccc > above {
|
||||
log.Fatalf("%U: CCC of successive rune (%U) was %d; want (0,230]", r, mr, ccc)
|
||||
}
|
||||
|
||||
// A.2.3: a sequence of modifiers all have the same CCC.
|
||||
if mc.CCC != ccc {
|
||||
log.Fatalf("%U: CCC of follow-up modifier (%U) was %d; want %d", r, mr, mc.CCC, ccc)
|
||||
}
|
||||
|
||||
// A.2.4: for each trailing r, r in [0x300, 0x311] <=> CCC == Above.
|
||||
if (ccc == above) != (0x300 <= mr && mr <= 0x311) {
|
||||
log.Fatalf("%U: modifier %U in [U+0300, U+0311] != ccc(%U) == 230", r, mr, mr)
|
||||
}
|
||||
|
||||
if i += len(string(mr)); i >= len(d) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A.3: no U+0307 in decomposition of Soft-Dotted rune. [ltUpper]
|
||||
if unicode.Is(unicode.Soft_Dotted, r) && strings.Contains(string(d), "\u0307") {
|
||||
log.Fatalf("%U: decomposition of soft-dotted rune may not contain U+0307", r)
|
||||
}
|
||||
|
||||
// A.4: only rune U+0345 may be of CCC Iota_Subscript. [elUpper]
|
||||
if c.CCC == iotaSubscript && r != 0x0345 {
|
||||
log.Fatalf("%U: only rune U+0345 may have CCC Iota_Subscript", r)
|
||||
}
|
||||
|
||||
// A.5: soft-dotted runes do not have exceptions.
|
||||
if c.SoftDotted && c.entry&exceptionBit != 0 {
|
||||
log.Fatalf("%U: soft-dotted has exception", r)
|
||||
}
|
||||
|
||||
// A.6: Greek decomposition. [elUpper]
|
||||
if unicode.Is(unicode.Greek, r) {
|
||||
if b := norm.NFD.PropertiesString(string(r)).Decomposition(); b != nil {
|
||||
runes := []rune(string(b))
|
||||
// A.6.1: If a Greek rune decomposes and the first rune of the
|
||||
// decomposition is greater than U+00FF, the rune is always
|
||||
// great and not a modifier.
|
||||
if f := runes[0]; unicode.IsMark(f) || f > 0xFF && !unicode.Is(unicode.Greek, f) {
|
||||
log.Fatalf("%U: expected first rune of Greek decomposition to be letter, found %U", r, f)
|
||||
}
|
||||
// A.6.2: Any follow-up rune in a Greek decomposition is a
|
||||
// modifier of which the first should be gobbled in
|
||||
// decomposition.
|
||||
for _, m := range runes[1:] {
|
||||
switch m {
|
||||
case 0x0313, 0x0314, 0x0301, 0x0300, 0x0306, 0x0342, 0x0308, 0x0304, 0x345:
|
||||
default:
|
||||
log.Fatalf("%U: modifier %U is outside of expected Greek modifier set", r, m)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Breaking properties.
|
||||
|
||||
// B.1: all runes with CCC > 0 are of break type Extend.
|
||||
if c.CCC > 0 && c.BreakType != "Extend" {
|
||||
log.Fatalf("%U: CCC == %d, but got break type %s; want Extend", r, c.CCC, c.BreakType)
|
||||
}
|
||||
|
||||
// B.2: all cased runes with c.CCC == 0 are of break type ALetter.
|
||||
if c.CCC == 0 && c.Cased && c.BreakType != "ALetter" {
|
||||
log.Fatalf("%U: cased, but got break type %s; want ALetter", r, c.BreakType)
|
||||
}
|
||||
|
||||
// B.3: letter category.
|
||||
if c.CCC == 0 && c.BreakCat != breakBreak && !c.CaseIgnorable {
|
||||
if c.BreakCat != breakLetter {
|
||||
log.Fatalf("%U: check for letter break type gave %d; want %d", r, c.BreakCat, breakLetter)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func genTablesTest() {
|
||||
w := &bytes.Buffer{}
|
||||
|
||||
fmt.Fprintln(w, "var (")
|
||||
printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)
|
||||
|
||||
// We discard the output as we know we have perfect functions. We run them
|
||||
// just to verify the properties are correct.
|
||||
n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
|
||||
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
|
||||
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
|
||||
if n > 0 {
|
||||
log.Fatalf("One of the discarded properties does not have a perfect filter.")
|
||||
}
|
||||
|
||||
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
|
||||
fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
|
||||
parse("SpecialCasing.txt", func(p *ucd.Parser) {
|
||||
// Skip conditional entries.
|
||||
if p.String(4) != "" {
|
||||
return
|
||||
}
|
||||
r := p.Rune(0)
|
||||
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
|
||||
r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
|
||||
})
|
||||
fmt.Fprint(w, "\t}\n\n")
|
||||
|
||||
// <code>; <type>; <runes>
|
||||
table := map[rune]struct{ simple, full, special string }{}
|
||||
parse("CaseFolding.txt", func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
t := p.String(1)
|
||||
v := string(p.Runes(2))
|
||||
if t != "T" && v == string(unicode.ToLower(r)) {
|
||||
return
|
||||
}
|
||||
x := table[r]
|
||||
switch t {
|
||||
case "C":
|
||||
x.full = v
|
||||
x.simple = v
|
||||
case "S":
|
||||
x.simple = v
|
||||
case "F":
|
||||
x.full = v
|
||||
case "T":
|
||||
x.special = v
|
||||
}
|
||||
table[r] = x
|
||||
})
|
||||
fmt.Fprintln(w, "\tfoldMap = map[rune]struct{ simple, full, special string }{")
|
||||
for r := rune(0); r < 0x10FFFF; r++ {
|
||||
x, ok := table[r]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, x.simple, x.full, x.special)
|
||||
}
|
||||
fmt.Fprint(w, "\t}\n\n")
|
||||
|
||||
// Break property
|
||||
notBreak := map[rune]bool{}
|
||||
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
|
||||
switch p.String(1) {
|
||||
case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
|
||||
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
|
||||
notBreak[p.Rune(0)] = true
|
||||
}
|
||||
})
|
||||
|
||||
fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
|
||||
inBreak := false
|
||||
for r := rune(0); r <= lastRuneForTesting; r++ {
|
||||
if isBreak := !notBreak[r]; isBreak != inBreak {
|
||||
if isBreak {
|
||||
fmt.Fprintf(w, "\t\t{0x%x, ", r)
|
||||
} else {
|
||||
fmt.Fprintf(w, "0x%x},\n", r-1)
|
||||
}
|
||||
inBreak = isBreak
|
||||
}
|
||||
}
|
||||
if inBreak {
|
||||
fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
|
||||
}
|
||||
fmt.Fprint(w, "\t}\n\n")
|
||||
|
||||
// Word break test
|
||||
// Filter out all samples that do not contain cased characters.
|
||||
cased := map[rune]bool{}
|
||||
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
|
||||
if p.String(1) == "Cased" {
|
||||
cased[p.Rune(0)] = true
|
||||
}
|
||||
})
|
||||
|
||||
fmt.Fprintln(w, "\tbreakTest = []string{")
|
||||
parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
|
||||
c := strings.Split(p.String(0), " ")
|
||||
|
||||
const sep = '|'
|
||||
numCased := 0
|
||||
test := ""
|
||||
for ; len(c) >= 2; c = c[2:] {
|
||||
if c[0] == "÷" && test != "" {
|
||||
test += string(sep)
|
||||
}
|
||||
i, err := strconv.ParseUint(c[1], 16, 32)
|
||||
r := rune(i)
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid rune %q.", c[1])
|
||||
}
|
||||
if r == sep {
|
||||
log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
|
||||
}
|
||||
if cased[r] {
|
||||
numCased++
|
||||
}
|
||||
test += string(r)
|
||||
}
|
||||
if numCased > 1 {
|
||||
fmt.Fprintf(w, "\t\t%q,\n", test)
|
||||
}
|
||||
})
|
||||
fmt.Fprintln(w, "\t}")
|
||||
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
gen.WriteGoFile("tables_test.go", "cases", w.Bytes())
|
||||
}
|
||||
|
||||
// These functions are just used for verification that their definition have not
|
||||
// changed in the Unicode Standard.
|
||||
|
||||
func verifyCased(r rune) bool {
|
||||
return verifyLower(r) || verifyUpper(r) || unicode.IsTitle(r)
|
||||
}
|
||||
|
||||
func verifyLower(r rune) bool {
|
||||
return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
|
||||
}
|
||||
|
||||
func verifyUpper(r rune) bool {
|
||||
return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
|
||||
}
|
||||
|
||||
// verifyIgnore is an approximation of the Case_Ignorable property using the
|
||||
// core unicode package. It is used to reduce the size of the test data.
|
||||
func verifyIgnore(r rune) bool {
|
||||
props := []*unicode.RangeTable{
|
||||
unicode.Mn,
|
||||
unicode.Me,
|
||||
unicode.Cf,
|
||||
unicode.Lm,
|
||||
unicode.Sk,
|
||||
}
|
||||
for _, p := range props {
|
||||
if unicode.Is(p, r) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// printProperties prints tables of rune properties from the given UCD file.
|
||||
// A filter func f can be given to exclude certain values. A rune r will have
|
||||
// the indicated property if it is in the generated table or if f(r).
|
||||
func printProperties(w io.Writer, file, property string, f func(r rune) bool) int {
|
||||
verify := map[rune]bool{}
|
||||
n := 0
|
||||
varNameParts := strings.Split(property, "_")
|
||||
varNameParts[0] = strings.ToLower(varNameParts[0])
|
||||
fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
|
||||
parse(file, func(p *ucd.Parser) {
|
||||
if p.String(1) == property {
|
||||
r := p.Rune(0)
|
||||
verify[r] = true
|
||||
if !f(r) {
|
||||
n++
|
||||
fmt.Fprintf(w, "\t\t0x%.4x: true,\n", r)
|
||||
}
|
||||
}
|
||||
})
|
||||
fmt.Fprint(w, "\t}\n\n")
|
||||
|
||||
// Verify that f is correct, that is, it represents a subset of the property.
|
||||
for r := rune(0); r <= lastRuneForTesting; r++ {
|
||||
if !verify[r] && f(r) {
|
||||
log.Fatalf("Incorrect filter func for property %q.", property)
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// The newCaseTrie, sparseValues and sparseOffsets definitions below are
|
||||
// placeholders referred to by gen_trieval.go. The real definitions are
|
||||
// generated by this program and written to tables.go.
|
||||
|
||||
func newCaseTrie(int) int { return 0 }
|
||||
|
||||
var (
|
||||
sparseValues [0]valueRange
|
||||
sparseOffsets [0]uint16
|
||||
)
|
|
@ -1,219 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains definitions for interpreting the trie value of the case
|
||||
// trie generated by "go run gen*.go". It is shared by both the generator
|
||||
// program and the resultant package. Sharing is achieved by the generator
|
||||
// copying gen_trieval.go to trieval.go and changing what's above this comment.
|
||||
|
||||
// info holds case information for a single rune. It is the value returned
|
||||
// by a trie lookup. Most mapping information can be stored in a single 16-bit
|
||||
// value. If not, for example when a rune is mapped to multiple runes, the value
|
||||
// stores some basic case data and an index into an array with additional data.
|
||||
//
|
||||
// The per-rune values have the following format:
|
||||
//
|
||||
// if (exception) {
|
||||
// 15..5 unsigned exception index
|
||||
// 4 unused
|
||||
// } else {
|
||||
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||
// Only 13..8 are used for XOR patterns.
|
||||
// 7 inverseFold (fold to upper, not to lower)
|
||||
// 6 index: interpret the XOR pattern as an index
|
||||
// or isMid if case mode is cIgnorableUncased.
|
||||
// 5..4 CCC: zero (normal or break), above or other
|
||||
// }
|
||||
// 3 exception: interpret this value as an exception index
|
||||
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||
// 2..0 case mode
|
||||
//
|
||||
// For the non-exceptional cases, a rune must be either uncased, lowercase or
|
||||
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
|
||||
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
|
||||
// least-significant bits of the rune).
|
||||
//
|
||||
// See the definitions below for a more detailed description of the various
|
||||
// bits.
|
||||
type info uint16
|
||||
|
||||
const (
|
||||
casedMask = 0x0003
|
||||
fullCasedMask = 0x0007
|
||||
ignorableMask = 0x0006
|
||||
ignorableValue = 0x0004
|
||||
|
||||
inverseFoldBit = 1 << 7
|
||||
isMidBit = 1 << 6
|
||||
|
||||
exceptionBit = 1 << 3
|
||||
exceptionShift = 5
|
||||
numExceptionBits = 11
|
||||
|
||||
xorIndexBit = 1 << 6
|
||||
xorShift = 8
|
||||
|
||||
// There is no mapping if all xor bits and the exception bit are zero.
|
||||
hasMappingMask = 0xff80 | exceptionBit
|
||||
)
|
||||
|
||||
// The case mode bits encodes the case type of a rune. This includes uncased,
|
||||
// title, upper and lower case and case ignorable. (For a definition of these
|
||||
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
|
||||
// cases, a rune can be both cased and case-ignorable. This is encoded by
|
||||
// cIgnorableCased. A rune of this type is always lower case. Some runes are
|
||||
// cased while not having a mapping.
|
||||
//
|
||||
// A common pattern for scripts in the Unicode standard is for upper and lower
|
||||
// case runes to alternate for increasing rune values (e.g. the accented Latin
|
||||
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
|
||||
// characters). We use this property by defining a cXORCase mode, where the case
|
||||
// mode (always upper or lower case) is derived from the rune value. As the XOR
|
||||
// pattern for case mappings is often identical for successive runes, using
|
||||
// cXORCase can result in large series of identical trie values. This, in turn,
|
||||
// allows us to better compress the trie blocks.
|
||||
const (
|
||||
cUncased info = iota // 000
|
||||
cTitle // 001
|
||||
cLower // 010
|
||||
cUpper // 011
|
||||
cIgnorableUncased // 100
|
||||
cIgnorableCased // 101 // lower case if mappings exist
|
||||
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
|
||||
|
||||
maxCaseMode = cUpper
|
||||
)
|
||||
|
||||
func (c info) isCased() bool {
|
||||
return c&casedMask != 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorable() bool {
|
||||
return c&ignorableMask == ignorableValue
|
||||
}
|
||||
|
||||
func (c info) isNotCasedAndNotCaseIgnorable() bool {
|
||||
return c&fullCasedMask == 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorableAndNotCased() bool {
|
||||
return c&fullCasedMask == cIgnorableUncased
|
||||
}
|
||||
|
||||
func (c info) isMid() bool {
|
||||
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
|
||||
}
|
||||
|
||||
// The case mapping implementation will need to know about various Canonical
|
||||
// Combining Class (CCC) values. We encode two of these in the trie value:
|
||||
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
|
||||
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
|
||||
// the rune also has the break category Break (see below).
|
||||
const (
|
||||
cccBreak info = iota << 4
|
||||
cccZero
|
||||
cccAbove
|
||||
cccOther
|
||||
|
||||
cccMask = cccBreak | cccZero | cccAbove | cccOther
|
||||
)
|
||||
|
||||
const (
|
||||
starter = 0
|
||||
above = 230
|
||||
iotaSubscript = 240
|
||||
)
|
||||
|
||||
// The exceptions slice holds data that does not fit in a normal info entry.
|
||||
// The entry is pointed to by the exception index in an entry. It has the
|
||||
// following format:
|
||||
//
|
||||
// Header
|
||||
// byte 0:
|
||||
// 7..6 unused
|
||||
// 5..4 CCC type (same bits as entry)
|
||||
// 3 unused
|
||||
// 2..0 length of fold
|
||||
//
|
||||
// byte 1:
|
||||
// 7..6 unused
|
||||
// 5..3 length of 1st mapping of case type
|
||||
// 2..0 length of 2nd mapping of case type
|
||||
//
|
||||
// case 1st 2nd
|
||||
// lower -> upper, title
|
||||
// upper -> lower, title
|
||||
// title -> lower, upper
|
||||
//
|
||||
// Lengths with the value 0x7 indicate no value and implies no change.
|
||||
// A length of 0 indicates a mapping to zero-length string.
|
||||
//
|
||||
// Body bytes:
|
||||
// case folding bytes
|
||||
// lowercase mapping bytes
|
||||
// uppercase mapping bytes
|
||||
// titlecase mapping bytes
|
||||
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||
//
|
||||
// Fallbacks:
|
||||
// missing fold -> lower
|
||||
// missing title -> upper
|
||||
// all missing -> original rune
|
||||
//
|
||||
// exceptions starts with a dummy byte to enforce that there is no zero index
|
||||
// value.
|
||||
const (
|
||||
lengthMask = 0x07
|
||||
lengthBits = 3
|
||||
noChange = 0
|
||||
)
|
||||
|
||||
// References to generated trie.
|
||||
|
||||
var trie = newCaseTrie(0)
|
||||
|
||||
var sparse = sparseBlocks{
|
||||
values: sparseValues[:],
|
||||
offsets: sparseOffsets[:],
|
||||
}
|
||||
|
||||
// Sparse block lookup code.
|
||||
|
||||
// valueRange is an entry in a sparse block.
|
||||
type valueRange struct {
|
||||
value uint16
|
||||
lo, hi byte
|
||||
}
|
||||
|
||||
type sparseBlocks struct {
|
||||
values []valueRange
|
||||
offsets []uint16
|
||||
}
|
||||
|
||||
// lookup returns the value from values block n for byte b using binary search.
|
||||
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||
lo := s.offsets[n]
|
||||
hi := s.offsets[n+1]
|
||||
for lo < hi {
|
||||
m := lo + (hi-lo)/2
|
||||
r := s.values[m]
|
||||
if r.lo <= b && b <= r.hi {
|
||||
return r.value
|
||||
}
|
||||
if b < r.lo {
|
||||
hi = m
|
||||
} else {
|
||||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// lastRuneForTesting is the last rune used for testing. Everything after this
|
||||
// is boring.
|
||||
const lastRuneForTesting = rune(0x1FFFF)
|
|
@ -1,61 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build icu
|
||||
|
||||
package cases
|
||||
|
||||
// Ideally these functions would be defined in a test file, but go test doesn't
|
||||
// allow CGO in tests. The build tag should ensure either way that these
|
||||
// functions will not end up in the package.
|
||||
|
||||
// TODO: Ensure that the correct ICU version is set.
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -licui18n.57 -licuuc.57
|
||||
#include <stdlib.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <unicode/localpointer.h>
|
||||
#include <unicode/ucasemap.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func doICU(tag, caser, input string) string {
|
||||
err := C.UErrorCode(0)
|
||||
loc := C.CString(tag)
|
||||
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
|
||||
|
||||
buf := make([]byte, len(input)*4)
|
||||
dst := (*C.char)(unsafe.Pointer(&buf[0]))
|
||||
src := C.CString(input)
|
||||
|
||||
cn := C.int32_t(0)
|
||||
|
||||
switch caser {
|
||||
case "fold":
|
||||
cn = C.ucasemap_utf8FoldCase(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "lower":
|
||||
cn = C.ucasemap_utf8ToLower(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "upper":
|
||||
cn = C.ucasemap_utf8ToUpper(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
case "title":
|
||||
cn = C.ucasemap_utf8ToTitle(cm,
|
||||
dst, C.int32_t(len(buf)),
|
||||
src, C.int32_t(len(input)),
|
||||
&err)
|
||||
}
|
||||
return string(buf[:cn])
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
func (c info) cccVal() info {
|
||||
if c&exceptionBit != 0 {
|
||||
return info(exceptions[c>>exceptionShift]) & cccMask
|
||||
}
|
||||
return c & cccMask
|
||||
}
|
||||
|
||||
func (c info) cccType() info {
|
||||
ccc := c.cccVal()
|
||||
if ccc <= cccZero {
|
||||
return cccZero
|
||||
}
|
||||
return ccc
|
||||
}
|
||||
|
||||
// TODO: Implement full Unicode breaking algorithm:
|
||||
// 1) Implement breaking in separate package.
|
||||
// 2) Use the breaker here.
|
||||
// 3) Compare table size and performance of using the more generic breaker.
|
||||
//
|
||||
// Note that we can extend the current algorithm to be much more accurate. This
|
||||
// only makes sense, though, if the performance and/or space penalty of using
|
||||
// the generic breaker is big. Extra data will only be needed for non-cased
|
||||
// runes, which means there are sufficient bits left in the caseType.
|
||||
// ICU prohibits breaking in such cases as well.
|
||||
|
||||
// For the purpose of title casing we use an approximation of the Unicode Word
|
||||
// Breaking algorithm defined in Annex #29:
|
||||
// http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table.
|
||||
//
|
||||
// For our approximation, we group the Word Break types into the following
|
||||
// categories, with associated rules:
|
||||
//
|
||||
// 1) Letter:
|
||||
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
|
||||
// Rule: Never break between consecutive runes of this category.
|
||||
//
|
||||
// 2) Mid:
|
||||
// MidLetter, MidNumLet, Single_Quote.
|
||||
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
|
||||
// Me, Cf, Lm or Sk).
|
||||
// Rule: Don't break between Letter and Mid, but break between two Mids.
|
||||
//
|
||||
// 3) Break:
|
||||
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
|
||||
// Other.
|
||||
// These categories should always result in a break between two cased letters.
|
||||
// Rule: Always break.
|
||||
//
|
||||
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in
|
||||
// preventing a break between two cased letters. For now we will ignore this
|
||||
// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and
|
||||
// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].)
|
||||
//
|
||||
// Note 2: the rule for Mid is very approximate, but works in most cases. To
|
||||
// improve, we could store the categories in the trie value and use a FA to
|
||||
// manage breaks. See TODO comment above.
|
||||
//
|
||||
// Note 3: according to the spec, it is possible for the Extend category to
|
||||
// introduce breaks between other categories grouped in Letter. However, this
|
||||
// is undesirable for our purposes. ICU prevents breaks in such cases as well.
|
||||
|
||||
// isBreak returns whether this rune should introduce a break.
|
||||
func (c info) isBreak() bool {
|
||||
return c.cccVal() == cccBreak
|
||||
}
|
||||
|
||||
// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter,
|
||||
// Numeric, ExtendNumLet, or Extend.
|
||||
func (c info) isLetter() bool {
|
||||
ccc := c.cccVal()
|
||||
if ccc == cccZero {
|
||||
return !c.isCaseIgnorable()
|
||||
}
|
||||
return ccc != cccBreak
|
||||
}
|
|
@ -1,816 +0,0 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package cases
|
||||
|
||||
// This file contains the definitions of case mappings for all supported
|
||||
// languages. The rules for the language-specific tailorings were taken and
|
||||
// modified from the CLDR transform definitions in common/transforms.
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// A mapFunc takes a context set to the current rune and writes the mapped
|
||||
// version to the same context. It may advance the context to the next rune. It
|
||||
// returns whether a checkpoint is possible: whether the pDst bytes written to
|
||||
// dst so far won't need changing as we see more source bytes.
|
||||
type mapFunc func(*context) bool
|
||||
|
||||
// A spanFunc takes a context set to the current rune and returns whether this
|
||||
// rune would be altered when written to the output. It may advance the context
|
||||
// to the next rune. It returns whether a checkpoint is possible.
|
||||
type spanFunc func(*context) bool
|
||||
|
||||
// maxIgnorable defines the maximum number of ignorables to consider for
|
||||
// lookahead operations.
|
||||
const maxIgnorable = 30
|
||||
|
||||
// supported lists the language tags for which we have tailorings.
|
||||
const supported = "und af az el lt nl tr"
|
||||
|
||||
func init() {
|
||||
tags := []language.Tag{}
|
||||
for _, s := range strings.Split(supported, " ") {
|
||||
tags = append(tags, language.MustParse(s))
|
||||
}
|
||||
matcher = internal.NewInheritanceMatcher(tags)
|
||||
Supported = language.NewCoverage(tags)
|
||||
}
|
||||
|
||||
var (
|
||||
matcher *internal.InheritanceMatcher
|
||||
|
||||
Supported language.Coverage
|
||||
|
||||
// We keep the following lists separate, instead of having a single per-
|
||||
// language struct, to give the compiler a chance to remove unused code.
|
||||
|
||||
// Some uppercase mappers are stateless, so we can precompute the
|
||||
// Transformers and save a bit on runtime allocations.
|
||||
upperFunc = []struct {
|
||||
upper mapFunc
|
||||
span spanFunc
|
||||
}{
|
||||
{nil, nil}, // und
|
||||
{nil, nil}, // af
|
||||
{aztrUpper(upper), isUpper}, // az
|
||||
{elUpper, noSpan}, // el
|
||||
{ltUpper(upper), noSpan}, // lt
|
||||
{nil, nil}, // nl
|
||||
{aztrUpper(upper), isUpper}, // tr
|
||||
}
|
||||
|
||||
undUpper transform.SpanningTransformer = &undUpperCaser{}
|
||||
undLower transform.SpanningTransformer = &undLowerCaser{}
|
||||
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
|
||||
|
||||
lowerFunc = []mapFunc{
|
||||
nil, // und
|
||||
nil, // af
|
||||
aztrLower, // az
|
||||
nil, // el
|
||||
ltLower, // lt
|
||||
nil, // nl
|
||||
aztrLower, // tr
|
||||
}
|
||||
|
||||
titleInfos = []struct {
|
||||
title mapFunc
|
||||
lower mapFunc
|
||||
titleSpan spanFunc
|
||||
rewrite func(*context)
|
||||
}{
|
||||
{title, lower, isTitle, nil}, // und
|
||||
{title, lower, isTitle, afnlRewrite}, // af
|
||||
{aztrUpper(title), aztrLower, isTitle, nil}, // az
|
||||
{title, lower, isTitle, nil}, // el
|
||||
{ltUpper(title), ltLower, noSpan, nil}, // lt
|
||||
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
|
||||
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
|
||||
}
|
||||
)
|
||||
|
||||
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
f := upperFunc[i].upper
|
||||
if f == nil {
|
||||
return undUpper
|
||||
}
|
||||
return &simpleCaser{f: f, span: upperFunc[i].span}
|
||||
}
|
||||
|
||||
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
f := lowerFunc[i]
|
||||
if f == nil {
|
||||
if o.ignoreFinalSigma {
|
||||
return undLowerIgnoreSigma
|
||||
}
|
||||
return undLower
|
||||
}
|
||||
if o.ignoreFinalSigma {
|
||||
return &simpleCaser{f: f, span: isLower}
|
||||
}
|
||||
return &lowerCaser{
|
||||
first: f,
|
||||
midWord: finalSigma(f),
|
||||
}
|
||||
}
|
||||
|
||||
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
|
||||
_, i, _ := matcher.Match(t)
|
||||
x := &titleInfos[i]
|
||||
lower := x.lower
|
||||
if o.noLower {
|
||||
lower = (*context).copy
|
||||
} else if !o.ignoreFinalSigma {
|
||||
lower = finalSigma(lower)
|
||||
}
|
||||
return &titleCaser{
|
||||
title: x.title,
|
||||
lower: lower,
|
||||
titleSpan: x.titleSpan,
|
||||
rewrite: x.rewrite,
|
||||
}
|
||||
}
|
||||
|
||||
func noSpan(c *context) bool {
|
||||
c.err = transform.ErrEndOfSpan
|
||||
return false
|
||||
}
|
||||
|
||||
// TODO: consider a similar special case for the fast majority lower case. This
|
||||
// is a bit more involved so will require some more precise benchmarking to
|
||||
// justify it.
|
||||
|
||||
type undUpperCaser struct{ transform.NopResetter }
|
||||
|
||||
// undUpperCaser implements the Transformer interface for doing an upper case
|
||||
// mapping for the root locale (und). It eliminates the need for an allocation
|
||||
// as it prevents escaping by not using function pointers.
|
||||
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() {
|
||||
upper(&c)
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isUpper(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
|
||||
// a lower case mapping for the root locale (und) ignoring final sigma
|
||||
// handling. This casing algorithm is used in some performance-critical packages
|
||||
// like secure/precis and x/net/http/idna, which warrants its special-casing.
|
||||
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
|
||||
|
||||
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() && lower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
|
||||
}
|
||||
|
||||
// Span implements a generic lower-casing. This is possible as isLower works
|
||||
// for all lowercasing variants. All lowercase variants only vary in how they
|
||||
// transform a non-lowercase letter. They will never change an already lowercase
|
||||
// letter. In addition, there is no state.
|
||||
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isLower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
type simpleCaser struct {
|
||||
context
|
||||
f mapFunc
|
||||
span spanFunc
|
||||
}
|
||||
|
||||
// simpleCaser implements the Transformer interface for doing a case operation
|
||||
// on a rune-by-rune basis.
|
||||
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
for c.next() && t.f(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && t.span(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// undLowerCaser implements the Transformer interface for doing a lower case
|
||||
// mapping for the root locale (und) ignoring final sigma handling. This casing
|
||||
// algorithm is used in some performance-critical packages like secure/precis
|
||||
// and x/net/http/idna, which warrants its special-casing.
|
||||
type undLowerCaser struct{ transform.NopResetter }
|
||||
|
||||
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
c := context{dst: dst, src: src, atEOF: atEOF}
|
||||
|
||||
for isInterWord := true; c.next(); {
|
||||
if isInterWord {
|
||||
if c.info.isCased() {
|
||||
if !lower(&c) {
|
||||
break
|
||||
}
|
||||
isInterWord = false
|
||||
} else if !c.copy() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||
if !c.copy() {
|
||||
break
|
||||
}
|
||||
isInterWord = true
|
||||
} else if !c.hasPrefix("Σ") {
|
||||
if !lower(&c) {
|
||||
break
|
||||
}
|
||||
} else if !finalSigmaBody(&c) {
|
||||
break
|
||||
}
|
||||
}
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
c := context{src: src, atEOF: atEOF}
|
||||
for c.next() && isLower(&c) {
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// lowerCaser implements the Transformer interface. The default Unicode lower
|
||||
// casing requires different treatment for the first and subsequent characters
|
||||
// of a word, most notably to handle the Greek final Sigma.
|
||||
type lowerCaser struct {
|
||||
undLowerIgnoreSigmaCaser
|
||||
|
||||
context
|
||||
|
||||
first, midWord mapFunc
|
||||
}
|
||||
|
||||
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
t.context = context{dst: dst, src: src, atEOF: atEOF}
|
||||
c := &t.context
|
||||
|
||||
for isInterWord := true; c.next(); {
|
||||
if isInterWord {
|
||||
if c.info.isCased() {
|
||||
if !t.first(c) {
|
||||
break
|
||||
}
|
||||
isInterWord = false
|
||||
} else if !c.copy() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if c.info.isNotCasedAndNotCaseIgnorable() {
|
||||
if !c.copy() {
|
||||
break
|
||||
}
|
||||
isInterWord = true
|
||||
} else if !t.midWord(c) {
|
||||
break
|
||||
}
|
||||
}
|
||||
c.checkpoint()
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
// titleCaser implements the Transformer interface. Title casing algorithms
|
||||
// distinguish between the first letter of a word and subsequent letters of the
|
||||
// same word. It uses state to avoid requiring a potentially infinite lookahead.
|
||||
type titleCaser struct {
|
||||
context
|
||||
|
||||
// rune mappings used by the actual casing algorithms.
|
||||
title mapFunc
|
||||
lower mapFunc
|
||||
titleSpan spanFunc
|
||||
|
||||
rewrite func(*context)
|
||||
}
|
||||
|
||||
// Transform implements the standard Unicode title case algorithm as defined in
|
||||
// Chapter 3 of The Unicode Standard:
|
||||
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
|
||||
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
|
||||
// first cased character F following the word boundary. If F exists, map F to
|
||||
// Titlecase_Mapping(F); then map all characters C between F and the following
|
||||
// word boundary to Lowercase_Mapping(C).
|
||||
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||
c := &t.context
|
||||
|
||||
if !c.next() {
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
for {
|
||||
p := c.info
|
||||
if t.rewrite != nil {
|
||||
t.rewrite(c)
|
||||
}
|
||||
|
||||
wasMid := p.isMid()
|
||||
// Break out of this loop on failure to ensure we do not modify the
|
||||
// state incorrectly.
|
||||
if p.isCased() {
|
||||
if !c.isMidWord {
|
||||
if !t.title(c) {
|
||||
break
|
||||
}
|
||||
c.isMidWord = true
|
||||
} else if !t.lower(c) {
|
||||
break
|
||||
}
|
||||
} else if !c.copy() {
|
||||
break
|
||||
} else if p.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
|
||||
// As we save the state of the transformer, it is safe to call
|
||||
// checkpoint after any successful write.
|
||||
if !(c.isMidWord && wasMid) {
|
||||
c.checkpoint()
|
||||
}
|
||||
|
||||
if !c.next() {
|
||||
break
|
||||
}
|
||||
if wasMid && c.info.isMid() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
}
|
||||
return c.ret()
|
||||
}
|
||||
|
||||
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
|
||||
c := &t.context
|
||||
|
||||
if !c.next() {
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
for {
|
||||
p := c.info
|
||||
if t.rewrite != nil {
|
||||
t.rewrite(c)
|
||||
}
|
||||
|
||||
wasMid := p.isMid()
|
||||
// Break out of this loop on failure to ensure we do not modify the
|
||||
// state incorrectly.
|
||||
if p.isCased() {
|
||||
if !c.isMidWord {
|
||||
if !t.titleSpan(c) {
|
||||
break
|
||||
}
|
||||
c.isMidWord = true
|
||||
} else if !isLower(c) {
|
||||
break
|
||||
}
|
||||
} else if p.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
// As we save the state of the transformer, it is safe to call
|
||||
// checkpoint after any successful write.
|
||||
if !(c.isMidWord && wasMid) {
|
||||
c.checkpoint()
|
||||
}
|
||||
|
||||
if !c.next() {
|
||||
break
|
||||
}
|
||||
if wasMid && c.info.isMid() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
}
|
||||
return c.retSpan()
|
||||
}
|
||||
|
||||
// finalSigma adds Greek final Sigma handing to another casing function. It
|
||||
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
|
||||
// case-ignorables and a cased letters.
|
||||
func finalSigma(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
if !c.hasPrefix("Σ") {
|
||||
return f(c)
|
||||
}
|
||||
return finalSigmaBody(c)
|
||||
}
|
||||
}
|
||||
|
||||
func finalSigmaBody(c *context) bool {
|
||||
// Current rune must be ∑.
|
||||
|
||||
// ::NFD();
|
||||
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
|
||||
// Σ } [:case-ignorable:]* [:cased:] → σ;
|
||||
// [:cased:] [:case-ignorable:]* { Σ → ς;
|
||||
// ::Any-Lower;
|
||||
// ::NFC();
|
||||
|
||||
p := c.pDst
|
||||
c.writeString("ς")
|
||||
|
||||
// TODO: we should do this here, but right now this will never have an
|
||||
// effect as this is called when the prefix is Sigma, whereas Dutch and
|
||||
// Afrikaans only test for an apostrophe.
|
||||
//
|
||||
// if t.rewrite != nil {
|
||||
// t.rewrite(c)
|
||||
// }
|
||||
|
||||
// We need to do one more iteration after maxIgnorable, as a cased
|
||||
// letter is not an ignorable and may modify the result.
|
||||
wasMid := false
|
||||
for i := 0; i < maxIgnorable+1; i++ {
|
||||
if !c.next() {
|
||||
return false
|
||||
}
|
||||
if !c.info.isCaseIgnorable() {
|
||||
// All Midword runes are also case ignorable, so we are
|
||||
// guaranteed to have a letter or word break here. As we are
|
||||
// unreading the run, there is no need to unset c.isMidWord;
|
||||
// the title caser will handle this.
|
||||
if c.info.isCased() {
|
||||
// p+1 is guaranteed to be in bounds: if writing ς was
|
||||
// successful, p+1 will contain the second byte of ς. If not,
|
||||
// this function will have returned after c.next returned false.
|
||||
c.dst[p+1]++ // ς → σ
|
||||
}
|
||||
c.unreadRune()
|
||||
return true
|
||||
}
|
||||
// A case ignorable may also introduce a word break, so we may need
|
||||
// to continue searching even after detecting a break.
|
||||
isMid := c.info.isMid()
|
||||
if (wasMid && isMid) || c.info.isBreak() {
|
||||
c.isMidWord = false
|
||||
}
|
||||
wasMid = isMid
|
||||
c.copy()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// finalSigmaSpan would be the same as isLower.
|
||||
|
||||
// elUpper implements Greek upper casing, which entails removing a predefined
|
||||
// set of non-blocked modifiers. Note that these accents should not be removed
|
||||
// for title casing!
|
||||
// Example: "Οδός" -> "ΟΔΟΣ".
|
||||
func elUpper(c *context) bool {
|
||||
// From CLDR:
|
||||
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
|
||||
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
|
||||
|
||||
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||
oldPDst := c.pDst
|
||||
if !upper(c) {
|
||||
return false
|
||||
}
|
||||
if !unicode.Is(unicode.Greek, r) {
|
||||
return true
|
||||
}
|
||||
i := 0
|
||||
// Take the properties of the uppercased rune that is already written to the
|
||||
// destination. This saves us the trouble of having to uppercase the
|
||||
// decomposed rune again.
|
||||
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
|
||||
// Restore the destination position and process the decomposed rune.
|
||||
r, sz := utf8.DecodeRune(b)
|
||||
if r <= 0xFF { // See A.6.1
|
||||
return true
|
||||
}
|
||||
c.pDst = oldPDst
|
||||
// Insert the first rune and ignore the modifiers. See A.6.2.
|
||||
c.writeBytes(b[:sz])
|
||||
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
|
||||
}
|
||||
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
|
||||
// Above and Iota Subscript
|
||||
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
|
||||
0x0301, // U+0301 COMBINING ACUTE ACCENT
|
||||
0x0304, // U+0304 COMBINING MACRON
|
||||
0x0306, // U+0306 COMBINING BREVE
|
||||
0x0308, // U+0308 COMBINING DIAERESIS
|
||||
0x0313, // U+0313 COMBINING COMMA ABOVE
|
||||
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
|
||||
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
|
||||
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
|
||||
// No-op. Gobble the modifier.
|
||||
|
||||
default:
|
||||
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
|
||||
// We don't need to test for IotaSubscript as the only rune that
|
||||
// qualifies (U+0345) was already excluded in the switch statement
|
||||
// above. See A.4.
|
||||
|
||||
case cccAbove:
|
||||
return c.copy()
|
||||
default:
|
||||
// Some other modifier. We're still allowed to gobble Greek
|
||||
// modifiers after this.
|
||||
c.copy()
|
||||
}
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
|
||||
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
|
||||
|
||||
func ltLower(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Introduce an explicit dot above when lowercasing capital I's and J's
|
||||
// # whenever there are more accents above.
|
||||
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
|
||||
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
|
||||
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
|
||||
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
// ::NFD();
|
||||
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
|
||||
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
|
||||
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
|
||||
// I \u0300 (Ì) → i \u0307 \u0300;
|
||||
// I \u0301 (Í) → i \u0307 \u0301;
|
||||
// I \u0303 (Ĩ) → i \u0307 \u0303;
|
||||
// ::Any-Lower();
|
||||
// ::NFC();
|
||||
|
||||
i := 0
|
||||
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
|
||||
lower(c)
|
||||
if r != 'I' && r != 'J' {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
p := norm.NFD.Properties(c.src[c.pSrc:])
|
||||
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
|
||||
// UTF-8 optimization: the decomposition will only have an above
|
||||
// modifier if the last rune of the decomposition is in [U+300-U+311].
|
||||
// In all other cases, a decomposition starting with I is always
|
||||
// an I followed by modifiers that are not cased themselves. See A.2.
|
||||
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
|
||||
if !c.writeBytes(d[:1]) {
|
||||
return false
|
||||
}
|
||||
c.dst[c.pDst-1] += 'a' - 'A' // lower
|
||||
|
||||
// Assumption: modifier never changes on lowercase. See A.1.
|
||||
// Assumption: all modifiers added have CCC = Above. See A.2.3.
|
||||
return c.writeString("\u0307") && c.writeBytes(d[1:])
|
||||
}
|
||||
// In all other cases the additional modifiers will have a CCC
|
||||
// that is less than 230 (Above). We will insert the U+0307, if
|
||||
// needed, after these modifiers so that a string in FCD form
|
||||
// will remain so. See A.2.2.
|
||||
lower(c)
|
||||
i = 1
|
||||
} else {
|
||||
return lower(c)
|
||||
}
|
||||
}
|
||||
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
case cccAbove:
|
||||
return c.writeString("\u0307") && c.copy() // See A.1.
|
||||
default:
|
||||
c.copy() // See A.1.
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
|
||||
// ltLowerSpan would be the same as isLower.
|
||||
|
||||
func ltUpper(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
// Unicode:
|
||||
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
|
||||
//
|
||||
// From CLDR:
|
||||
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
|
||||
// # intervening non-230 marks.
|
||||
// ::NFD();
|
||||
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
|
||||
// ::Any-Upper();
|
||||
// ::NFC();
|
||||
|
||||
// TODO: See A.5. A soft-dotted rune never has an exception. This would
|
||||
// allow us to overload the exception bit and encode this property in
|
||||
// info. Need to measure performance impact of this.
|
||||
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
|
||||
oldPDst := c.pDst
|
||||
if !f(c) {
|
||||
return false
|
||||
}
|
||||
if !unicode.Is(unicode.Soft_Dotted, r) {
|
||||
return true
|
||||
}
|
||||
|
||||
// We don't need to do an NFD normalization, as a soft-dotted rune never
|
||||
// contains U+0307. See A.3.
|
||||
|
||||
i := 0
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
return true
|
||||
case cccAbove:
|
||||
if c.hasPrefix("\u0307") {
|
||||
// We don't do a full NFC, but rather combine runes for
|
||||
// some of the common cases. (Returning NFC or
|
||||
// preserving normal form is neither a requirement nor
|
||||
// a possibility anyway).
|
||||
if !c.next() {
|
||||
return false
|
||||
}
|
||||
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
|
||||
s := ""
|
||||
switch c.src[c.pSrc+1] {
|
||||
case 0x80: // U+0300 COMBINING GRAVE ACCENT
|
||||
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
|
||||
case 0x81: // U+0301 COMBINING ACUTE ACCENT
|
||||
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
|
||||
case 0x83: // U+0303 COMBINING TILDE
|
||||
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
|
||||
case 0x88: // U+0308 COMBINING DIAERESIS
|
||||
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
default:
|
||||
}
|
||||
if s != "" {
|
||||
c.pDst = oldPDst
|
||||
return c.writeString(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
return c.copy()
|
||||
default:
|
||||
c.copy()
|
||||
}
|
||||
}
|
||||
return i == maxIgnorable
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
|
||||
|
||||
func aztrUpper(f mapFunc) mapFunc {
|
||||
return func(c *context) bool {
|
||||
// i→İ;
|
||||
if c.src[c.pSrc] == 'i' {
|
||||
return c.writeString("İ")
|
||||
}
|
||||
return f(c)
|
||||
}
|
||||
}
|
||||
|
||||
func aztrLower(c *context) (done bool) {
|
||||
// From CLDR:
|
||||
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
|
||||
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
// İ→i;
|
||||
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
|
||||
// # This matches the behavior of the canonically equivalent I-dot_above
|
||||
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
|
||||
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
|
||||
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
|
||||
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
|
||||
// I→ı ;
|
||||
// ::Any-Lower();
|
||||
if c.hasPrefix("\u0130") { // İ
|
||||
return c.writeString("i")
|
||||
}
|
||||
if c.src[c.pSrc] != 'I' {
|
||||
return lower(c)
|
||||
}
|
||||
|
||||
// We ignore the lower-case I for now, but insert it later when we know
|
||||
// which form we need.
|
||||
start := c.pSrc + c.sz
|
||||
|
||||
i := 0
|
||||
Loop:
|
||||
// We check for up to n ignorables before \u0307. As \u0307 is an
|
||||
// ignorable as well, n is maxIgnorable-1.
|
||||
for ; i < maxIgnorable && c.next(); i++ {
|
||||
switch c.info.cccType() {
|
||||
case cccAbove:
|
||||
if c.hasPrefix("\u0307") {
|
||||
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
|
||||
}
|
||||
done = true
|
||||
break Loop
|
||||
case cccZero:
|
||||
c.unreadRune()
|
||||
done = true
|
||||
break Loop
|
||||
default:
|
||||
// We'll write this rune after we know which starter to use.
|
||||
}
|
||||
}
|
||||
if i == maxIgnorable {
|
||||
done = true
|
||||
}
|
||||
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
|
||||
}
|
||||
|
||||
// aztrLowerSpan would be the same as isLower.
|
||||
|
||||
func nlTitle(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Special titlecasing for Dutch initial "ij".
|
||||
// ::Any-Title();
|
||||
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
|
||||
return title(c)
|
||||
}
|
||||
|
||||
if !c.writeString("I") || !c.next() {
|
||||
return false
|
||||
}
|
||||
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
|
||||
return c.writeString("J")
|
||||
}
|
||||
c.unreadRune()
|
||||
return true
|
||||
}
|
||||
|
||||
func nlTitleSpan(c *context) bool {
|
||||
// From CLDR:
|
||||
// # Special titlecasing for Dutch initial "ij".
|
||||
// ::Any-Title();
|
||||
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
|
||||
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
|
||||
if c.src[c.pSrc] != 'I' {
|
||||
return isTitle(c)
|
||||
}
|
||||
if !c.next() || c.src[c.pSrc] == 'j' {
|
||||
return false
|
||||
}
|
||||
if c.src[c.pSrc] != 'J' {
|
||||
c.unreadRune()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
|
||||
func afnlRewrite(c *context) {
|
||||
if c.hasPrefix("'") || c.hasPrefix("’") {
|
||||
c.isMidWord = true
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,215 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package cases
|
||||
|
||||
// This file contains definitions for interpreting the trie value of the case
|
||||
// trie generated by "go run gen*.go". It is shared by both the generator
|
||||
// program and the resultant package. Sharing is achieved by the generator
|
||||
// copying gen_trieval.go to trieval.go and changing what's above this comment.
|
||||
|
||||
// info holds case information for a single rune. It is the value returned
|
||||
// by a trie lookup. Most mapping information can be stored in a single 16-bit
|
||||
// value. If not, for example when a rune is mapped to multiple runes, the value
|
||||
// stores some basic case data and an index into an array with additional data.
|
||||
//
|
||||
// The per-rune values have the following format:
|
||||
//
|
||||
// if (exception) {
|
||||
// 15..5 unsigned exception index
|
||||
// 4 unused
|
||||
// } else {
|
||||
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||
// Only 13..8 are used for XOR patterns.
|
||||
// 7 inverseFold (fold to upper, not to lower)
|
||||
// 6 index: interpret the XOR pattern as an index
|
||||
// or isMid if case mode is cIgnorableUncased.
|
||||
// 5..4 CCC: zero (normal or break), above or other
|
||||
// }
|
||||
// 3 exception: interpret this value as an exception index
|
||||
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||
// 2..0 case mode
|
||||
//
|
||||
// For the non-exceptional cases, a rune must be either uncased, lowercase or
|
||||
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
|
||||
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
|
||||
// least-significant bits of the rune).
|
||||
//
|
||||
// See the definitions below for a more detailed description of the various
|
||||
// bits.
|
||||
type info uint16
|
||||
|
||||
const (
|
||||
casedMask = 0x0003
|
||||
fullCasedMask = 0x0007
|
||||
ignorableMask = 0x0006
|
||||
ignorableValue = 0x0004
|
||||
|
||||
inverseFoldBit = 1 << 7
|
||||
isMidBit = 1 << 6
|
||||
|
||||
exceptionBit = 1 << 3
|
||||
exceptionShift = 5
|
||||
numExceptionBits = 11
|
||||
|
||||
xorIndexBit = 1 << 6
|
||||
xorShift = 8
|
||||
|
||||
// There is no mapping if all xor bits and the exception bit are zero.
|
||||
hasMappingMask = 0xff80 | exceptionBit
|
||||
)
|
||||
|
||||
// The case mode bits encodes the case type of a rune. This includes uncased,
|
||||
// title, upper and lower case and case ignorable. (For a definition of these
|
||||
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
|
||||
// cases, a rune can be both cased and case-ignorable. This is encoded by
|
||||
// cIgnorableCased. A rune of this type is always lower case. Some runes are
|
||||
// cased while not having a mapping.
|
||||
//
|
||||
// A common pattern for scripts in the Unicode standard is for upper and lower
|
||||
// case runes to alternate for increasing rune values (e.g. the accented Latin
|
||||
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
|
||||
// characters). We use this property by defining a cXORCase mode, where the case
|
||||
// mode (always upper or lower case) is derived from the rune value. As the XOR
|
||||
// pattern for case mappings is often identical for successive runes, using
|
||||
// cXORCase can result in large series of identical trie values. This, in turn,
|
||||
// allows us to better compress the trie blocks.
|
||||
const (
|
||||
cUncased info = iota // 000
|
||||
cTitle // 001
|
||||
cLower // 010
|
||||
cUpper // 011
|
||||
cIgnorableUncased // 100
|
||||
cIgnorableCased // 101 // lower case if mappings exist
|
||||
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
|
||||
|
||||
maxCaseMode = cUpper
|
||||
)
|
||||
|
||||
func (c info) isCased() bool {
|
||||
return c&casedMask != 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorable() bool {
|
||||
return c&ignorableMask == ignorableValue
|
||||
}
|
||||
|
||||
func (c info) isNotCasedAndNotCaseIgnorable() bool {
|
||||
return c&fullCasedMask == 0
|
||||
}
|
||||
|
||||
func (c info) isCaseIgnorableAndNotCased() bool {
|
||||
return c&fullCasedMask == cIgnorableUncased
|
||||
}
|
||||
|
||||
func (c info) isMid() bool {
|
||||
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
|
||||
}
|
||||
|
||||
// The case mapping implementation will need to know about various Canonical
|
||||
// Combining Class (CCC) values. We encode two of these in the trie value:
|
||||
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
|
||||
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
|
||||
// the rune also has the break category Break (see below).
|
||||
const (
|
||||
cccBreak info = iota << 4
|
||||
cccZero
|
||||
cccAbove
|
||||
cccOther
|
||||
|
||||
cccMask = cccBreak | cccZero | cccAbove | cccOther
|
||||
)
|
||||
|
||||
const (
|
||||
starter = 0
|
||||
above = 230
|
||||
iotaSubscript = 240
|
||||
)
|
||||
|
||||
// The exceptions slice holds data that does not fit in a normal info entry.
|
||||
// The entry is pointed to by the exception index in an entry. It has the
|
||||
// following format:
|
||||
//
|
||||
// Header
|
||||
// byte 0:
|
||||
// 7..6 unused
|
||||
// 5..4 CCC type (same bits as entry)
|
||||
// 3 unused
|
||||
// 2..0 length of fold
|
||||
//
|
||||
// byte 1:
|
||||
// 7..6 unused
|
||||
// 5..3 length of 1st mapping of case type
|
||||
// 2..0 length of 2nd mapping of case type
|
||||
//
|
||||
// case 1st 2nd
|
||||
// lower -> upper, title
|
||||
// upper -> lower, title
|
||||
// title -> lower, upper
|
||||
//
|
||||
// Lengths with the value 0x7 indicate no value and implies no change.
|
||||
// A length of 0 indicates a mapping to zero-length string.
|
||||
//
|
||||
// Body bytes:
|
||||
// case folding bytes
|
||||
// lowercase mapping bytes
|
||||
// uppercase mapping bytes
|
||||
// titlecase mapping bytes
|
||||
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||
//
|
||||
// Fallbacks:
|
||||
// missing fold -> lower
|
||||
// missing title -> upper
|
||||
// all missing -> original rune
|
||||
//
|
||||
// exceptions starts with a dummy byte to enforce that there is no zero index
|
||||
// value.
|
||||
const (
|
||||
lengthMask = 0x07
|
||||
lengthBits = 3
|
||||
noChange = 0
|
||||
)
|
||||
|
||||
// References to generated trie.
|
||||
|
||||
var trie = newCaseTrie(0)
|
||||
|
||||
var sparse = sparseBlocks{
|
||||
values: sparseValues[:],
|
||||
offsets: sparseOffsets[:],
|
||||
}
|
||||
|
||||
// Sparse block lookup code.
|
||||
|
||||
// valueRange is an entry in a sparse block.
|
||||
type valueRange struct {
|
||||
value uint16
|
||||
lo, hi byte
|
||||
}
|
||||
|
||||
type sparseBlocks struct {
|
||||
values []valueRange
|
||||
offsets []uint16
|
||||
}
|
||||
|
||||
// lookup returns the value from values block n for byte b using binary search.
|
||||
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
|
||||
lo := s.offsets[n]
|
||||
hi := s.offsets[n+1]
|
||||
for lo < hi {
|
||||
m := lo + (hi-lo)/2
|
||||
r := s.values[m]
|
||||
if r.lo <= b && b <= r.hi {
|
||||
return r.value
|
||||
}
|
||||
if b < r.lo {
|
||||
hi = m
|
||||
} else {
|
||||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// lastRuneForTesting is the last rune used for testing. Everything after this
|
||||
// is boring.
|
||||
const lastRuneForTesting = rune(0x1FFFF)
|
|
@ -1,32 +0,0 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"internal.go",
|
||||
"match.go",
|
||||
"tables.go",
|
||||
],
|
||||
importmap = "k8s.io/kubernetes/vendor/golang.org/x/text/internal",
|
||||
importpath = "golang.org/x/text/internal",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["//vendor/golang.org/x/text/language:go_default_library"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//vendor/golang.org/x/text/internal/tag:all-srcs",
|
||||
"//vendor/golang.org/x/text/internal/utf8internal:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
|
@ -1,52 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
func main() {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "internal")
|
||||
|
||||
// Create parents table.
|
||||
parents := make([]uint16, language.NumCompactTags)
|
||||
for _, loc := range data.Locales() {
|
||||
tag := language.MustParse(loc)
|
||||
index, ok := language.CompactIndex(tag)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
parentIndex := 0 // und
|
||||
for p := tag.Parent(); p != language.Und; p = p.Parent() {
|
||||
if x, ok := language.CompactIndex(p); ok {
|
||||
parentIndex = x
|
||||
break
|
||||
}
|
||||
}
|
||||
parents[index] = uint16(parentIndex)
|
||||
}
|
||||
|
||||
w.WriteComment(`
|
||||
Parent maps a compact index of a tag to the compact index of the parent of
|
||||
this tag.`)
|
||||
w.WriteVar("Parent", parents)
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package internal contains non-exported functionality that are used by
|
||||
// packages in the text repository.
|
||||
package internal // import "golang.org/x/text/internal"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// SortTags sorts tags in place.
|
||||
func SortTags(tags []language.Tag) {
|
||||
sort.Sort(sorter(tags))
|
||||
}
|
||||
|
||||
type sorter []language.Tag
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sorter) Less(i, j int) bool {
|
||||
return s[i].String() < s[j].String()
|
||||
}
|
||||
|
||||
// UniqueTags sorts and filters duplicate tags in place and returns a slice with
|
||||
// only unique tags.
|
||||
func UniqueTags(tags []language.Tag) []language.Tag {
|
||||
if len(tags) <= 1 {
|
||||
return tags
|
||||
}
|
||||
SortTags(tags)
|
||||
k := 0
|
||||
for i := 1; i < len(tags); i++ {
|
||||
if tags[k].String() < tags[i].String() {
|
||||
k++
|
||||
tags[k] = tags[i]
|
||||
}
|
||||
}
|
||||
return tags[:k+1]
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package internal
|
||||
|
||||
// This file contains matchers that implement CLDR inheritance.
|
||||
//
|
||||
// See http://unicode.org/reports/tr35/#Locale_Inheritance.
|
||||
//
|
||||
// Some of the inheritance described in this document is already handled by
|
||||
// the cldr package.
|
||||
|
||||
import (
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// TODO: consider if (some of the) matching algorithm needs to be public after
|
||||
// getting some feel about what is generic and what is specific.
|
||||
|
||||
// NewInheritanceMatcher returns a matcher that matches based on the inheritance
|
||||
// chain.
|
||||
//
|
||||
// The matcher uses canonicalization and the parent relationship to find a
|
||||
// match. The resulting match will always be either Und or a language with the
|
||||
// same language and script as the requested language. It will not match
|
||||
// languages for which there is understood to be mutual or one-directional
|
||||
// intelligibility.
|
||||
//
|
||||
// A Match will indicate an Exact match if the language matches after
|
||||
// canonicalization and High if the matched tag is a parent.
|
||||
func NewInheritanceMatcher(t []language.Tag) *InheritanceMatcher {
|
||||
tags := &InheritanceMatcher{make(map[language.Tag]int)}
|
||||
for i, tag := range t {
|
||||
ct, err := language.All.Canonicalize(tag)
|
||||
if err != nil {
|
||||
ct = tag
|
||||
}
|
||||
tags.index[ct] = i
|
||||
}
|
||||
return tags
|
||||
}
|
||||
|
||||
type InheritanceMatcher struct {
|
||||
index map[language.Tag]int
|
||||
}
|
||||
|
||||
func (m InheritanceMatcher) Match(want ...language.Tag) (language.Tag, int, language.Confidence) {
|
||||
for _, t := range want {
|
||||
ct, err := language.All.Canonicalize(t)
|
||||
if err != nil {
|
||||
ct = t
|
||||
}
|
||||
conf := language.Exact
|
||||
for {
|
||||
if index, ok := m.index[ct]; ok {
|
||||
return ct, index, conf
|
||||
}
|
||||
if ct == language.Und {
|
||||
break
|
||||
}
|
||||
ct = ct.Parent()
|
||||
conf = language.High
|
||||
}
|
||||
}
|
||||
return language.Und, 0, language.No
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package internal
|
||||
|
||||
// Parent maps a compact index of a tag to the compact index of the parent of
|
||||
// this tag.
|
||||
var Parent = []uint16{ // 754 elements
|
||||
// Entry 0 - 3F
|
||||
0x0000, 0x0053, 0x00e5, 0x0000, 0x0003, 0x0003, 0x0000, 0x0006,
|
||||
0x0000, 0x0008, 0x0000, 0x000a, 0x0000, 0x000c, 0x000c, 0x000c,
|
||||
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
|
||||
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
|
||||
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
|
||||
0x000c, 0x0000, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x002e,
|
||||
0x0000, 0x0000, 0x0031, 0x0030, 0x0030, 0x0000, 0x0035, 0x0000,
|
||||
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x003d, 0x0000,
|
||||
// Entry 40 - 7F
|
||||
0x0000, 0x0040, 0x0000, 0x0042, 0x0042, 0x0000, 0x0045, 0x0045,
|
||||
0x0000, 0x0048, 0x0000, 0x004a, 0x0000, 0x0000, 0x004d, 0x004c,
|
||||
0x004c, 0x0000, 0x0051, 0x0051, 0x0051, 0x0051, 0x0000, 0x0056,
|
||||
0x0000, 0x0058, 0x0000, 0x005a, 0x0000, 0x005c, 0x005c, 0x0000,
|
||||
0x005f, 0x0000, 0x0061, 0x0000, 0x0063, 0x0000, 0x0065, 0x0065,
|
||||
0x0000, 0x0068, 0x0000, 0x006a, 0x006a, 0x006a, 0x006a, 0x006a,
|
||||
0x006a, 0x006a, 0x0000, 0x0072, 0x0000, 0x0074, 0x0000, 0x0076,
|
||||
0x0000, 0x0000, 0x0079, 0x0000, 0x007b, 0x0000, 0x007d, 0x0000,
|
||||
// Entry 80 - BF
|
||||
0x007f, 0x007f, 0x0000, 0x0082, 0x0082, 0x0000, 0x0085, 0x0086,
|
||||
0x0086, 0x0086, 0x0085, 0x0087, 0x0086, 0x0086, 0x0086, 0x0085,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086,
|
||||
0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0087, 0x0086, 0x0086,
|
||||
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
|
||||
0x0086, 0x0086, 0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0086, 0x0085, 0x0086,
|
||||
// Entry C0 - FF
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0085,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0086,
|
||||
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
|
||||
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0085, 0x0086, 0x0086,
|
||||
0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0000, 0x00ee,
|
||||
0x0000, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1,
|
||||
0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1, 0x00f0, 0x00f0, 0x00f1,
|
||||
// Entry 100 - 13F
|
||||
0x00f1, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1,
|
||||
0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x0000, 0x010d, 0x0000,
|
||||
0x010f, 0x0000, 0x0111, 0x0000, 0x0113, 0x0113, 0x0000, 0x0116,
|
||||
0x0116, 0x0116, 0x0116, 0x0000, 0x011b, 0x0000, 0x011d, 0x0000,
|
||||
0x011f, 0x011f, 0x0000, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
|
||||
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
|
||||
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
|
||||
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
|
||||
// Entry 140 - 17F
|
||||
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
|
||||
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
|
||||
0x0122, 0x0000, 0x0151, 0x0000, 0x0153, 0x0000, 0x0155, 0x0000,
|
||||
0x0157, 0x0000, 0x0159, 0x0000, 0x015b, 0x015b, 0x015b, 0x0000,
|
||||
0x015f, 0x0000, 0x0000, 0x0162, 0x0000, 0x0164, 0x0000, 0x0166,
|
||||
0x0166, 0x0166, 0x0000, 0x016a, 0x0000, 0x016c, 0x0000, 0x016e,
|
||||
0x0000, 0x0170, 0x0170, 0x0000, 0x0173, 0x0000, 0x0175, 0x0000,
|
||||
0x0177, 0x0000, 0x0179, 0x0000, 0x017b, 0x0000, 0x017d, 0x0000,
|
||||
// Entry 180 - 1BF
|
||||
0x017f, 0x0000, 0x0181, 0x0181, 0x0181, 0x0181, 0x0000, 0x0000,
|
||||
0x0187, 0x0000, 0x0000, 0x018a, 0x0000, 0x018c, 0x0000, 0x0000,
|
||||
0x018f, 0x0000, 0x0191, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000,
|
||||
0x0197, 0x0000, 0x0199, 0x0000, 0x019b, 0x0000, 0x019d, 0x0000,
|
||||
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
|
||||
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x01ab, 0x0000, 0x01ae,
|
||||
0x0000, 0x01b0, 0x0000, 0x01b2, 0x0000, 0x01b4, 0x0000, 0x01b6,
|
||||
0x0000, 0x0000, 0x01b9, 0x0000, 0x01bb, 0x0000, 0x01bd, 0x0000,
|
||||
// Entry 1C0 - 1FF
|
||||
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x01c5,
|
||||
0x01c5, 0x01c5, 0x0000, 0x01ca, 0x0000, 0x01cc, 0x01cc, 0x0000,
|
||||
0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
|
||||
0x01d7, 0x0000, 0x01d9, 0x01d9, 0x0000, 0x01dc, 0x0000, 0x01de,
|
||||
0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
|
||||
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
|
||||
0x01ee, 0x01ee, 0x0000, 0x01f2, 0x0000, 0x01f4, 0x0000, 0x01f6,
|
||||
0x0000, 0x01f8, 0x0000, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x01fd,
|
||||
// Entry 200 - 23F
|
||||
0x0000, 0x0200, 0x0000, 0x0202, 0x0202, 0x0000, 0x0205, 0x0205,
|
||||
0x0000, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208,
|
||||
0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0214, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x021a, 0x0000, 0x0000, 0x021d, 0x0000,
|
||||
0x021f, 0x021f, 0x0000, 0x0222, 0x0000, 0x0224, 0x0224, 0x0000,
|
||||
0x0000, 0x0228, 0x0227, 0x0227, 0x0000, 0x0000, 0x022d, 0x0000,
|
||||
0x022f, 0x0000, 0x0231, 0x0000, 0x023d, 0x0233, 0x023d, 0x023d,
|
||||
0x023d, 0x023d, 0x023d, 0x023d, 0x023d, 0x0233, 0x023d, 0x023d,
|
||||
// Entry 240 - 27F
|
||||
0x0000, 0x0240, 0x0240, 0x0240, 0x0000, 0x0244, 0x0000, 0x0246,
|
||||
0x0000, 0x0248, 0x0248, 0x0000, 0x024b, 0x0000, 0x024d, 0x024d,
|
||||
0x024d, 0x024d, 0x024d, 0x024d, 0x0000, 0x0254, 0x0000, 0x0256,
|
||||
0x0000, 0x0258, 0x0000, 0x025a, 0x0000, 0x025c, 0x0000, 0x0000,
|
||||
0x025f, 0x025f, 0x025f, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
|
||||
0x0267, 0x0000, 0x0000, 0x026a, 0x0269, 0x0269, 0x0000, 0x026e,
|
||||
0x0000, 0x0270, 0x0000, 0x0272, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0277, 0x0000, 0x0000, 0x027a, 0x0000, 0x027c, 0x027c, 0x027c,
|
||||
// Entry 280 - 2BF
|
||||
0x027c, 0x0000, 0x0281, 0x0281, 0x0281, 0x0000, 0x0285, 0x0285,
|
||||
0x0285, 0x0285, 0x0285, 0x0000, 0x028b, 0x028b, 0x028b, 0x028b,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0293, 0x0293, 0x0293, 0x0000,
|
||||
0x0297, 0x0297, 0x0297, 0x0297, 0x0000, 0x0000, 0x029d, 0x029d,
|
||||
0x029d, 0x029d, 0x0000, 0x02a2, 0x0000, 0x02a4, 0x02a4, 0x0000,
|
||||
0x02a7, 0x0000, 0x02a9, 0x02a9, 0x0000, 0x0000, 0x02ad, 0x0000,
|
||||
0x0000, 0x02b0, 0x0000, 0x02b2, 0x02b2, 0x0000, 0x0000, 0x02b6,
|
||||
0x0000, 0x02b8, 0x0000, 0x02ba, 0x0000, 0x02bc, 0x0000, 0x02be,
|
||||
// Entry 2C0 - 2FF
|
||||
0x02be, 0x0000, 0x0000, 0x02c2, 0x0000, 0x02c4, 0x02c1, 0x02c1,
|
||||
0x0000, 0x0000, 0x02c9, 0x02c8, 0x02c8, 0x0000, 0x0000, 0x02ce,
|
||||
0x0000, 0x02d0, 0x0000, 0x02d2, 0x0000, 0x0000, 0x02d5, 0x0000,
|
||||
0x0000, 0x0000, 0x02d9, 0x0000, 0x02db, 0x0000, 0x02dd, 0x0000,
|
||||
0x02df, 0x02df, 0x0000, 0x02e2, 0x0000, 0x02e4, 0x0000, 0x02e6,
|
||||
0x02e6, 0x02e6, 0x02e6, 0x02e6, 0x0000, 0x02ec, 0x02ed, 0x02ec,
|
||||
0x0000, 0x02f0,
|
||||
} // Size: 1532 bytes
|
||||
|
||||
// Total table size 1532 bytes (1KiB); checksum: 90718A2
|
|
@ -1,43 +0,0 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"class.go",
|
||||
"context.go",
|
||||
"doc.go",
|
||||
"nickname.go",
|
||||
"options.go",
|
||||
"profile.go",
|
||||
"profiles.go",
|
||||
"tables.go",
|
||||
"transformer.go",
|
||||
"trieval.go",
|
||||
],
|
||||
importmap = "k8s.io/kubernetes/vendor/golang.org/x/text/secure/precis",
|
||||
importpath = "golang.org/x/text/secure/precis",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//vendor/golang.org/x/text/cases:go_default_library",
|
||||
"//vendor/golang.org/x/text/language:go_default_library",
|
||||
"//vendor/golang.org/x/text/runes:go_default_library",
|
||||
"//vendor/golang.org/x/text/secure/bidirule:go_default_library",
|
||||
"//vendor/golang.org/x/text/transform:go_default_library",
|
||||
"//vendor/golang.org/x/text/unicode/norm:go_default_library",
|
||||
"//vendor/golang.org/x/text/width:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
|
@ -1,36 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TODO: Add contextual character rules from Appendix A of RFC5892.
|
||||
|
||||
// A class is a set of characters that match certain derived properties. The
|
||||
// PRECIS framework defines two classes: The Freeform class and the Identifier
|
||||
// class. The freeform class should be used for profiles where expressiveness is
|
||||
// prioritized over safety such as nicknames or passwords. The identifier class
|
||||
// should be used for profiles where safety is the first priority such as
|
||||
// addressable network labels and usernames.
|
||||
type class struct {
|
||||
validFrom property
|
||||
}
|
||||
|
||||
// Contains satisfies the runes.Set interface and returns whether the given rune
|
||||
// is a member of the class.
|
||||
func (c class) Contains(r rune) bool {
|
||||
b := make([]byte, 4)
|
||||
n := utf8.EncodeRune(b, r)
|
||||
|
||||
trieval, _ := dpTrie.lookup(b[:n])
|
||||
return c.validFrom <= property(trieval)
|
||||
}
|
||||
|
||||
var (
|
||||
identifier = &class{validFrom: pValid}
|
||||
freeform = &class{validFrom: idDisOrFreePVal}
|
||||
)
|
|
@ -1,139 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import "errors"
|
||||
|
||||
// This file contains tables and code related to context rules.
|
||||
|
||||
type catBitmap uint16
|
||||
|
||||
const (
|
||||
// These bits, once set depending on the current value, are never unset.
|
||||
bJapanese catBitmap = 1 << iota
|
||||
bArabicIndicDigit
|
||||
bExtendedArabicIndicDigit
|
||||
|
||||
// These bits are set on each iteration depending on the current value.
|
||||
bJoinStart
|
||||
bJoinMid
|
||||
bJoinEnd
|
||||
bVirama
|
||||
bLatinSmallL
|
||||
bGreek
|
||||
bHebrew
|
||||
|
||||
// These bits indicated which of the permanent bits need to be set at the
|
||||
// end of the checks.
|
||||
bMustHaveJapn
|
||||
|
||||
permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn
|
||||
)
|
||||
|
||||
const finalShift = 10
|
||||
|
||||
var errContext = errors.New("precis: contextual rule violated")
|
||||
|
||||
func init() {
|
||||
// Programmatically set these required bits as, manually setting them seems
|
||||
// too error prone.
|
||||
for i, ct := range categoryTransitions {
|
||||
categoryTransitions[i].keep |= permanent
|
||||
categoryTransitions[i].accept |= ct.term
|
||||
}
|
||||
}
|
||||
|
||||
var categoryTransitions = []struct {
|
||||
keep catBitmap // mask selecting which bits to keep from the previous state
|
||||
set catBitmap // mask for which bits to set for this transition
|
||||
|
||||
// These bitmaps are used for rules that require lookahead.
|
||||
// term&accept == term must be true, which is enforced programmatically.
|
||||
term catBitmap // bits accepted as termination condition
|
||||
accept catBitmap // bits that pass, but not sufficient as termination
|
||||
|
||||
// The rule function cannot take a *context as an argument, as it would
|
||||
// cause the context to escape, adding significant overhead.
|
||||
rule func(beforeBits catBitmap) (doLookahead bool, err error)
|
||||
}{
|
||||
joiningL: {set: bJoinStart},
|
||||
joiningD: {set: bJoinStart | bJoinEnd},
|
||||
joiningT: {keep: bJoinStart, set: bJoinMid},
|
||||
joiningR: {set: bJoinEnd},
|
||||
viramaModifier: {set: bVirama},
|
||||
viramaJoinT: {set: bVirama | bJoinMid},
|
||||
latinSmallL: {set: bLatinSmallL},
|
||||
greek: {set: bGreek},
|
||||
greekJoinT: {set: bGreek | bJoinMid},
|
||||
hebrew: {set: bHebrew},
|
||||
hebrewJoinT: {set: bHebrew | bJoinMid},
|
||||
japanese: {set: bJapanese},
|
||||
katakanaMiddleDot: {set: bMustHaveJapn},
|
||||
|
||||
zeroWidthNonJoiner: {
|
||||
term: bJoinEnd,
|
||||
accept: bJoinMid,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bVirama != 0 {
|
||||
return false, nil
|
||||
}
|
||||
if before&bJoinStart == 0 {
|
||||
return false, errContext
|
||||
}
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
zeroWidthJoiner: {
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bVirama == 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
middleDot: {
|
||||
term: bLatinSmallL,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bLatinSmallL == 0 {
|
||||
return false, errContext
|
||||
}
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
greekLowerNumeralSign: {
|
||||
set: bGreek,
|
||||
term: bGreek,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
return true, nil
|
||||
},
|
||||
},
|
||||
hebrewPreceding: {
|
||||
set: bHebrew,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bHebrew == 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
arabicIndicDigit: {
|
||||
set: bArabicIndicDigit,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bExtendedArabicIndicDigit != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
extendedArabicIndicDigit: {
|
||||
set: bExtendedArabicIndicDigit,
|
||||
rule: func(before catBitmap) (doLookAhead bool, err error) {
|
||||
if before&bArabicIndicDigit != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return false, err
|
||||
},
|
||||
},
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package precis contains types and functions for the preparation,
|
||||
// enforcement, and comparison of internationalized strings ("PRECIS") as
|
||||
// defined in RFC 7564. It also contains several pre-defined profiles for
|
||||
// passwords, nicknames, and usernames as defined in RFC 7613 and RFC 7700.
|
||||
//
|
||||
// BE ADVISED: This package is under construction and the API may change in
|
||||
// backwards incompatible ways and without notice.
|
||||
package precis // import "golang.org/x/text/secure/precis"
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go
|
|
@ -1,310 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Unicode table generator.
|
||||
// Data read from the web.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
var outputFile = flag.String("output", "tables.go", "output file for generated tables; default tables.go")
|
||||
|
||||
var assigned, disallowedRunes *unicode.RangeTable
|
||||
|
||||
var runeCategory = map[rune]category{}
|
||||
|
||||
var overrides = map[category]category{
|
||||
viramaModifier: viramaJoinT,
|
||||
greek: greekJoinT,
|
||||
hebrew: hebrewJoinT,
|
||||
}
|
||||
|
||||
func setCategory(r rune, cat category) {
|
||||
if c, ok := runeCategory[r]; ok {
|
||||
if override, ok := overrides[c]; cat == joiningT && ok {
|
||||
cat = override
|
||||
} else {
|
||||
log.Fatalf("%U: multiple categories for rune (%v and %v)", r, c, cat)
|
||||
}
|
||||
}
|
||||
runeCategory[r] = cat
|
||||
}
|
||||
|
||||
func init() {
|
||||
if numCategories > 1<<propShift {
|
||||
log.Fatalf("Number of categories is %d; may at most be %d", numCategories, 1<<propShift)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
// Load data
|
||||
runes := []rune{}
|
||||
// PrecisIgnorableProperties: https://tools.ietf.org/html/rfc7564#section-9.13
|
||||
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
|
||||
if p.String(1) == "Default_Ignorable_Code_Point" {
|
||||
runes = append(runes, p.Rune(0))
|
||||
}
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("PropList.txt"), func(p *ucd.Parser) {
|
||||
switch p.String(1) {
|
||||
case "Noncharacter_Code_Point":
|
||||
runes = append(runes, p.Rune(0))
|
||||
}
|
||||
})
|
||||
// OldHangulJamo: https://tools.ietf.org/html/rfc5892#section-2.9
|
||||
ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) {
|
||||
switch p.String(1) {
|
||||
case "L", "V", "T":
|
||||
runes = append(runes, p.Rune(0))
|
||||
}
|
||||
})
|
||||
|
||||
disallowedRunes = rangetable.New(runes...)
|
||||
assigned = rangetable.Assigned(unicode.Version)
|
||||
|
||||
// Load category data.
|
||||
runeCategory['l'] = latinSmallL
|
||||
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
|
||||
const cccVirama = 9
|
||||
if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
|
||||
setCategory(p.Rune(0), viramaModifier)
|
||||
}
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("Scripts.txt"), func(p *ucd.Parser) {
|
||||
switch p.String(1) {
|
||||
case "Greek":
|
||||
setCategory(p.Rune(0), greek)
|
||||
case "Hebrew":
|
||||
setCategory(p.Rune(0), hebrew)
|
||||
case "Hiragana", "Katakana", "Han":
|
||||
setCategory(p.Rune(0), japanese)
|
||||
}
|
||||
})
|
||||
|
||||
// Set the rule categories associated with exceptions. This overrides any
|
||||
// previously set categories. The original categories are manually
|
||||
// reintroduced in the categoryTransitions table.
|
||||
for r, e := range exceptions {
|
||||
if e.cat != 0 {
|
||||
runeCategory[r] = e.cat
|
||||
}
|
||||
}
|
||||
cat := map[string]category{
|
||||
"L": joiningL,
|
||||
"D": joiningD,
|
||||
"T": joiningT,
|
||||
|
||||
"R": joiningR,
|
||||
}
|
||||
ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
|
||||
switch v := p.String(1); v {
|
||||
case "L", "D", "T", "R":
|
||||
setCategory(p.Rune(0), cat[v])
|
||||
}
|
||||
})
|
||||
|
||||
writeTables()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "precis")
|
||||
}
|
||||
|
||||
type exception struct {
|
||||
prop property
|
||||
cat category
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Programmatically add the Arabic and Indic digits to the exceptions map.
|
||||
// See comment in the exceptions map below why these are marked disallowed.
|
||||
for i := rune(0); i <= 9; i++ {
|
||||
exceptions[0x0660+i] = exception{
|
||||
prop: disallowed,
|
||||
cat: arabicIndicDigit,
|
||||
}
|
||||
exceptions[0x06F0+i] = exception{
|
||||
prop: disallowed,
|
||||
cat: extendedArabicIndicDigit,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The Exceptions class as defined in RFC 5892
|
||||
// https://tools.ietf.org/html/rfc5892#section-2.6
|
||||
var exceptions = map[rune]exception{
|
||||
0x00DF: {prop: pValid},
|
||||
0x03C2: {prop: pValid},
|
||||
0x06FD: {prop: pValid},
|
||||
0x06FE: {prop: pValid},
|
||||
0x0F0B: {prop: pValid},
|
||||
0x3007: {prop: pValid},
|
||||
|
||||
// ContextO|J rules are marked as disallowed, taking a "guilty until proven
|
||||
// innocent" approach. The main reason for this is that the check for
|
||||
// whether a context rule should be applied can be moved to the logic for
|
||||
// handing disallowed runes, taken it off the common path. The exception to
|
||||
// this rule is for katakanaMiddleDot, as the rule logic is handled without
|
||||
// using a rule function.
|
||||
|
||||
// ContextJ (Join control)
|
||||
0x200C: {prop: disallowed, cat: zeroWidthNonJoiner},
|
||||
0x200D: {prop: disallowed, cat: zeroWidthJoiner},
|
||||
|
||||
// ContextO
|
||||
0x00B7: {prop: disallowed, cat: middleDot},
|
||||
0x0375: {prop: disallowed, cat: greekLowerNumeralSign},
|
||||
0x05F3: {prop: disallowed, cat: hebrewPreceding}, // punctuation Geresh
|
||||
0x05F4: {prop: disallowed, cat: hebrewPreceding}, // punctuation Gershayim
|
||||
0x30FB: {prop: pValid, cat: katakanaMiddleDot},
|
||||
|
||||
// These are officially ContextO, but the implementation does not require
|
||||
// special treatment of these, so we simply mark them as valid.
|
||||
0x0660: {prop: pValid},
|
||||
0x0661: {prop: pValid},
|
||||
0x0662: {prop: pValid},
|
||||
0x0663: {prop: pValid},
|
||||
0x0664: {prop: pValid},
|
||||
0x0665: {prop: pValid},
|
||||
0x0666: {prop: pValid},
|
||||
0x0667: {prop: pValid},
|
||||
0x0668: {prop: pValid},
|
||||
0x0669: {prop: pValid},
|
||||
0x06F0: {prop: pValid},
|
||||
0x06F1: {prop: pValid},
|
||||
0x06F2: {prop: pValid},
|
||||
0x06F3: {prop: pValid},
|
||||
0x06F4: {prop: pValid},
|
||||
0x06F5: {prop: pValid},
|
||||
0x06F6: {prop: pValid},
|
||||
0x06F7: {prop: pValid},
|
||||
0x06F8: {prop: pValid},
|
||||
0x06F9: {prop: pValid},
|
||||
|
||||
0x0640: {prop: disallowed},
|
||||
0x07FA: {prop: disallowed},
|
||||
0x302E: {prop: disallowed},
|
||||
0x302F: {prop: disallowed},
|
||||
0x3031: {prop: disallowed},
|
||||
0x3032: {prop: disallowed},
|
||||
0x3033: {prop: disallowed},
|
||||
0x3034: {prop: disallowed},
|
||||
0x3035: {prop: disallowed},
|
||||
0x303B: {prop: disallowed},
|
||||
}
|
||||
|
||||
// LetterDigits: https://tools.ietf.org/html/rfc5892#section-2.1
|
||||
// r in {Ll, Lu, Lo, Nd, Lm, Mn, Mc}.
|
||||
func isLetterDigits(r rune) bool {
|
||||
return unicode.In(r,
|
||||
unicode.Ll, unicode.Lu, unicode.Lm, unicode.Lo, // Letters
|
||||
unicode.Mn, unicode.Mc, // Modifiers
|
||||
unicode.Nd, // Digits
|
||||
)
|
||||
}
|
||||
|
||||
func isIdDisAndFreePVal(r rune) bool {
|
||||
return unicode.In(r,
|
||||
// OtherLetterDigits: https://tools.ietf.org/html/rfc7564#section-9.18
|
||||
// r in in {Lt, Nl, No, Me}
|
||||
unicode.Lt, unicode.Nl, unicode.No, // Other letters / numbers
|
||||
unicode.Me, // Modifiers
|
||||
|
||||
// Spaces: https://tools.ietf.org/html/rfc7564#section-9.14
|
||||
// r in in {Zs}
|
||||
unicode.Zs,
|
||||
|
||||
// Symbols: https://tools.ietf.org/html/rfc7564#section-9.15
|
||||
// r in {Sm, Sc, Sk, So}
|
||||
unicode.Sm, unicode.Sc, unicode.Sk, unicode.So,
|
||||
|
||||
// Punctuation: https://tools.ietf.org/html/rfc7564#section-9.16
|
||||
// r in {Pc, Pd, Ps, Pe, Pi, Pf, Po}
|
||||
unicode.Pc, unicode.Pd, unicode.Ps, unicode.Pe,
|
||||
unicode.Pi, unicode.Pf, unicode.Po,
|
||||
)
|
||||
}
|
||||
|
||||
// HasCompat: https://tools.ietf.org/html/rfc7564#section-9.17
|
||||
func hasCompat(r rune) bool {
|
||||
return !norm.NFKC.IsNormalString(string(r))
|
||||
}
|
||||
|
||||
// From https://tools.ietf.org/html/rfc5892:
|
||||
//
|
||||
// If .cp. .in. Exceptions Then Exceptions(cp);
|
||||
// Else If .cp. .in. BackwardCompatible Then BackwardCompatible(cp);
|
||||
// Else If .cp. .in. Unassigned Then UNASSIGNED;
|
||||
// Else If .cp. .in. ASCII7 Then PVALID;
|
||||
// Else If .cp. .in. JoinControl Then CONTEXTJ;
|
||||
// Else If .cp. .in. OldHangulJamo Then DISALLOWED;
|
||||
// Else If .cp. .in. PrecisIgnorableProperties Then DISALLOWED;
|
||||
// Else If .cp. .in. Controls Then DISALLOWED;
|
||||
// Else If .cp. .in. HasCompat Then ID_DIS or FREE_PVAL;
|
||||
// Else If .cp. .in. LetterDigits Then PVALID;
|
||||
// Else If .cp. .in. OtherLetterDigits Then ID_DIS or FREE_PVAL;
|
||||
// Else If .cp. .in. Spaces Then ID_DIS or FREE_PVAL;
|
||||
// Else If .cp. .in. Symbols Then ID_DIS or FREE_PVAL;
|
||||
// Else If .cp. .in. Punctuation Then ID_DIS or FREE_PVAL;
|
||||
// Else DISALLOWED;
|
||||
|
||||
func writeTables() {
|
||||
propTrie := triegen.NewTrie("derivedProperties")
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile(*outputFile, "precis")
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
// Iterate over all the runes...
|
||||
for i := rune(0); i < unicode.MaxRune; i++ {
|
||||
r := rune(i)
|
||||
|
||||
if !utf8.ValidRune(r) {
|
||||
continue
|
||||
}
|
||||
|
||||
e, ok := exceptions[i]
|
||||
p := e.prop
|
||||
switch {
|
||||
case ok:
|
||||
case !unicode.In(r, assigned):
|
||||
p = unassigned
|
||||
case r >= 0x0021 && r <= 0x007e: // Is ASCII 7
|
||||
p = pValid
|
||||
case unicode.In(r, disallowedRunes, unicode.Cc):
|
||||
p = disallowed
|
||||
case hasCompat(r):
|
||||
p = idDisOrFreePVal
|
||||
case isLetterDigits(r):
|
||||
p = pValid
|
||||
case isIdDisAndFreePVal(r):
|
||||
p = idDisOrFreePVal
|
||||
default:
|
||||
p = disallowed
|
||||
}
|
||||
cat := runeCategory[r]
|
||||
// Don't set category for runes that are disallowed.
|
||||
if p == disallowed {
|
||||
cat = exceptions[r].cat
|
||||
}
|
||||
propTrie.Insert(r, uint64(p)|uint64(cat))
|
||||
}
|
||||
sz, err := propTrie.Gen(w)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
w.Size += sz
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// entry is the entry of a trie table
|
||||
// 7..6 property (unassigned, disallowed, maybe, valid)
|
||||
// 5..0 category
|
||||
type entry uint8
|
||||
|
||||
const (
|
||||
propShift = 6
|
||||
propMask = 0xc0
|
||||
catMask = 0x3f
|
||||
)
|
||||
|
||||
func (e entry) property() property { return property(e & propMask) }
|
||||
func (e entry) category() category { return category(e & catMask) }
|
||||
|
||||
type property uint8
|
||||
|
||||
// The order of these constants matter. A Profile may consider runes to be
|
||||
// allowed either from pValid or idDisOrFreePVal.
|
||||
const (
|
||||
unassigned property = iota << propShift
|
||||
disallowed
|
||||
idDisOrFreePVal // disallowed for Identifier, pValid for FreeForm
|
||||
pValid
|
||||
)
|
||||
|
||||
// compute permutations of all properties and specialCategories.
|
||||
type category uint8
|
||||
|
||||
const (
|
||||
other category = iota
|
||||
|
||||
// Special rune types
|
||||
joiningL
|
||||
joiningD
|
||||
joiningT
|
||||
joiningR
|
||||
viramaModifier
|
||||
viramaJoinT // Virama + JoiningT
|
||||
latinSmallL // U+006c
|
||||
greek
|
||||
greekJoinT // Greek + JoiningT
|
||||
hebrew
|
||||
hebrewJoinT // Hebrew + JoiningT
|
||||
japanese // hirigana, katakana, han
|
||||
|
||||
// Special rune types associated with contextual rules defined in
|
||||
// https://tools.ietf.org/html/rfc5892#appendix-A.
|
||||
// ContextO
|
||||
zeroWidthNonJoiner // rule 1
|
||||
zeroWidthJoiner // rule 2
|
||||
// ContextJ
|
||||
middleDot // rule 3
|
||||
greekLowerNumeralSign // rule 4
|
||||
hebrewPreceding // rule 5 and 6
|
||||
katakanaMiddleDot // rule 7
|
||||
arabicIndicDigit // rule 8
|
||||
extendedArabicIndicDigit // rule 9
|
||||
|
||||
numCategories
|
||||
)
|
|
@ -1,70 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
type nickAdditionalMapping struct {
|
||||
// TODO: This transformer needs to be stateless somehow…
|
||||
notStart bool
|
||||
prevSpace bool
|
||||
}
|
||||
|
||||
func (t *nickAdditionalMapping) Reset() {
|
||||
t.prevSpace = false
|
||||
t.notStart = false
|
||||
}
|
||||
|
||||
func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
// RFC 7700 §2.1. Rules
|
||||
//
|
||||
// 2. Additional Mapping Rule: The additional mapping rule consists of
|
||||
// the following sub-rules.
|
||||
//
|
||||
// 1. Any instances of non-ASCII space MUST be mapped to ASCII
|
||||
// space (U+0020); a non-ASCII space is any Unicode code point
|
||||
// having a general category of "Zs", naturally with the
|
||||
// exception of U+0020.
|
||||
//
|
||||
// 2. Any instances of the ASCII space character at the beginning
|
||||
// or end of a nickname MUST be removed (e.g., "stpeter " is
|
||||
// mapped to "stpeter").
|
||||
//
|
||||
// 3. Interior sequences of more than one ASCII space character
|
||||
// MUST be mapped to a single ASCII space character (e.g.,
|
||||
// "St Peter" is mapped to "St Peter").
|
||||
|
||||
for nSrc < len(src) {
|
||||
r, size := utf8.DecodeRune(src[nSrc:])
|
||||
if size == 0 { // Incomplete UTF-8 encoding
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
size = 1
|
||||
}
|
||||
if unicode.Is(unicode.Zs, r) {
|
||||
t.prevSpace = true
|
||||
} else {
|
||||
if t.prevSpace && t.notStart {
|
||||
dst[nDst] = ' '
|
||||
nDst += 1
|
||||
}
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
nDst += size
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += size
|
||||
t.prevSpace = false
|
||||
t.notStart = true
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, nil
|
||||
}
|
|
@ -1,153 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// An Option is used to define the behavior and rules of a Profile.
|
||||
type Option func(*options)
|
||||
|
||||
type options struct {
|
||||
// Preparation options
|
||||
foldWidth bool
|
||||
|
||||
// Enforcement options
|
||||
asciiLower bool
|
||||
cases transform.SpanningTransformer
|
||||
disallow runes.Set
|
||||
norm transform.SpanningTransformer
|
||||
additional []func() transform.SpanningTransformer
|
||||
width transform.SpanningTransformer
|
||||
disallowEmpty bool
|
||||
bidiRule bool
|
||||
|
||||
// Comparison options
|
||||
ignorecase bool
|
||||
}
|
||||
|
||||
func getOpts(o ...Option) (res options) {
|
||||
for _, f := range o {
|
||||
f(&res)
|
||||
}
|
||||
// Using a SpanningTransformer, instead of norm.Form prevents an allocation
|
||||
// down the road.
|
||||
if res.norm == nil {
|
||||
res.norm = norm.NFC
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
// The IgnoreCase option causes the profile to perform a case insensitive
|
||||
// comparison during the PRECIS comparison step.
|
||||
IgnoreCase Option = ignoreCase
|
||||
|
||||
// The FoldWidth option causes the profile to map non-canonical wide and
|
||||
// narrow variants to their decomposition mapping. This is useful for
|
||||
// profiles that are based on the identifier class which would otherwise
|
||||
// disallow such characters.
|
||||
FoldWidth Option = foldWidth
|
||||
|
||||
// The DisallowEmpty option causes the enforcement step to return an error if
|
||||
// the resulting string would be empty.
|
||||
DisallowEmpty Option = disallowEmpty
|
||||
|
||||
// The BidiRule option causes the Bidi Rule defined in RFC 5893 to be
|
||||
// applied.
|
||||
BidiRule Option = bidiRule
|
||||
)
|
||||
|
||||
var (
|
||||
ignoreCase = func(o *options) {
|
||||
o.ignorecase = true
|
||||
}
|
||||
foldWidth = func(o *options) {
|
||||
o.foldWidth = true
|
||||
}
|
||||
disallowEmpty = func(o *options) {
|
||||
o.disallowEmpty = true
|
||||
}
|
||||
bidiRule = func(o *options) {
|
||||
o.bidiRule = true
|
||||
}
|
||||
)
|
||||
|
||||
// TODO: move this logic to package transform
|
||||
|
||||
type spanWrap struct{ transform.Transformer }
|
||||
|
||||
func (s spanWrap) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return 0, transform.ErrEndOfSpan
|
||||
}
|
||||
|
||||
// TODO: allow different types? For instance:
|
||||
// func() transform.Transformer
|
||||
// func() transform.SpanningTransformer
|
||||
// func([]byte) bool // validation only
|
||||
//
|
||||
// Also, would be great if we could detect if a transformer is reentrant.
|
||||
|
||||
// The AdditionalMapping option defines the additional mapping rule for the
|
||||
// Profile by applying Transformer's in sequence.
|
||||
func AdditionalMapping(t ...func() transform.Transformer) Option {
|
||||
return func(o *options) {
|
||||
for _, f := range t {
|
||||
sf := func() transform.SpanningTransformer {
|
||||
return f().(transform.SpanningTransformer)
|
||||
}
|
||||
if _, ok := f().(transform.SpanningTransformer); !ok {
|
||||
sf = func() transform.SpanningTransformer {
|
||||
return spanWrap{f()}
|
||||
}
|
||||
}
|
||||
o.additional = append(o.additional, sf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The Norm option defines a Profile's normalization rule. Defaults to NFC.
|
||||
func Norm(f norm.Form) Option {
|
||||
return func(o *options) {
|
||||
o.norm = f
|
||||
}
|
||||
}
|
||||
|
||||
// The FoldCase option defines a Profile's case mapping rule. Options can be
|
||||
// provided to determine the type of case folding used.
|
||||
func FoldCase(opts ...cases.Option) Option {
|
||||
return func(o *options) {
|
||||
o.asciiLower = true
|
||||
o.cases = cases.Fold(opts...)
|
||||
}
|
||||
}
|
||||
|
||||
// The LowerCase option defines a Profile's case mapping rule. Options can be
|
||||
// provided to determine the type of case folding used.
|
||||
func LowerCase(opts ...cases.Option) Option {
|
||||
return func(o *options) {
|
||||
o.asciiLower = true
|
||||
if len(opts) == 0 {
|
||||
o.cases = cases.Lower(language.Und, cases.HandleFinalSigma(false))
|
||||
return
|
||||
}
|
||||
|
||||
opts = append([]cases.Option{cases.HandleFinalSigma(false)}, opts...)
|
||||
o.cases = cases.Lower(language.Und, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
// The Disallow option further restricts a Profile's allowed characters beyond
|
||||
// what is disallowed by the underlying string class.
|
||||
func Disallow(set runes.Set) Option {
|
||||
return func(o *options) {
|
||||
o.disallow = set
|
||||
}
|
||||
}
|
|
@ -1,378 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/cases"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/secure/bidirule"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/width"
|
||||
)
|
||||
|
||||
var (
|
||||
errDisallowedRune = errors.New("precis: disallowed rune encountered")
|
||||
)
|
||||
|
||||
var dpTrie = newDerivedPropertiesTrie(0)
|
||||
|
||||
// A Profile represents a set of rules for normalizing and validating strings in
|
||||
// the PRECIS framework.
|
||||
type Profile struct {
|
||||
options
|
||||
class *class
|
||||
}
|
||||
|
||||
// NewIdentifier creates a new PRECIS profile based on the Identifier string
|
||||
// class. Profiles created from this class are suitable for use where safety is
|
||||
// prioritized over expressiveness like network identifiers, user accounts, chat
|
||||
// rooms, and file names.
|
||||
func NewIdentifier(opts ...Option) *Profile {
|
||||
return &Profile{
|
||||
options: getOpts(opts...),
|
||||
class: identifier,
|
||||
}
|
||||
}
|
||||
|
||||
// NewFreeform creates a new PRECIS profile based on the Freeform string class.
|
||||
// Profiles created from this class are suitable for use where expressiveness is
|
||||
// prioritized over safety like passwords, and display-elements such as
|
||||
// nicknames in a chat room.
|
||||
func NewFreeform(opts ...Option) *Profile {
|
||||
return &Profile{
|
||||
options: getOpts(opts...),
|
||||
class: freeform,
|
||||
}
|
||||
}
|
||||
|
||||
// NewTransformer creates a new transform.Transformer that performs the PRECIS
|
||||
// preparation and enforcement steps on the given UTF-8 encoded bytes.
|
||||
func (p *Profile) NewTransformer() *Transformer {
|
||||
var ts []transform.Transformer
|
||||
|
||||
// These transforms are applied in the order defined in
|
||||
// https://tools.ietf.org/html/rfc7564#section-7
|
||||
|
||||
if p.options.foldWidth {
|
||||
ts = append(ts, width.Fold)
|
||||
}
|
||||
|
||||
for _, f := range p.options.additional {
|
||||
ts = append(ts, f())
|
||||
}
|
||||
|
||||
if p.options.cases != nil {
|
||||
ts = append(ts, p.options.cases)
|
||||
}
|
||||
|
||||
ts = append(ts, p.options.norm)
|
||||
|
||||
if p.options.bidiRule {
|
||||
ts = append(ts, bidirule.New())
|
||||
}
|
||||
|
||||
ts = append(ts, &checker{p: p, allowed: p.Allowed()})
|
||||
|
||||
// TODO: Add the disallow empty rule with a dummy transformer?
|
||||
|
||||
return &Transformer{transform.Chain(ts...)}
|
||||
}
|
||||
|
||||
var errEmptyString = errors.New("precis: transformation resulted in empty string")
|
||||
|
||||
type buffers struct {
|
||||
src []byte
|
||||
buf [2][]byte
|
||||
next int
|
||||
}
|
||||
|
||||
func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
|
||||
n, err := t.Span(b.src, true)
|
||||
if err != transform.ErrEndOfSpan {
|
||||
return err
|
||||
}
|
||||
x := b.next & 1
|
||||
if b.buf[x] == nil {
|
||||
b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
|
||||
}
|
||||
span := append(b.buf[x][:0], b.src[:n]...)
|
||||
b.src, _, err = transform.Append(t, span, b.src[n:])
|
||||
b.buf[x] = b.src
|
||||
b.next++
|
||||
return err
|
||||
}
|
||||
|
||||
// Pre-allocate transformers when possible. In some cases this avoids allocation.
|
||||
var (
|
||||
foldWidthT transform.SpanningTransformer = width.Fold
|
||||
lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
|
||||
)
|
||||
|
||||
// TODO: make this a method on profile.
|
||||
|
||||
func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
|
||||
b.src = src
|
||||
|
||||
ascii := true
|
||||
for _, c := range src {
|
||||
if c >= utf8.RuneSelf {
|
||||
ascii = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// ASCII fast path.
|
||||
if ascii {
|
||||
for _, f := range p.options.additional {
|
||||
if err = b.apply(f()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case p.options.asciiLower || (comparing && p.options.ignorecase):
|
||||
for i, c := range b.src {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
b.src[i] = c ^ 1<<5
|
||||
}
|
||||
}
|
||||
case p.options.cases != nil:
|
||||
b.apply(p.options.cases)
|
||||
}
|
||||
c := checker{p: p}
|
||||
if _, err := c.span(b.src, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p.disallow != nil {
|
||||
for _, c := range b.src {
|
||||
if p.disallow.Contains(rune(c)) {
|
||||
return nil, errDisallowedRune
|
||||
}
|
||||
}
|
||||
}
|
||||
if p.options.disallowEmpty && len(b.src) == 0 {
|
||||
return nil, errEmptyString
|
||||
}
|
||||
return b.src, nil
|
||||
}
|
||||
|
||||
// These transforms are applied in the order defined in
|
||||
// https://tools.ietf.org/html/rfc7564#section-7
|
||||
|
||||
// TODO: allow different width transforms options.
|
||||
if p.options.foldWidth || (p.options.ignorecase && comparing) {
|
||||
b.apply(foldWidthT)
|
||||
}
|
||||
for _, f := range p.options.additional {
|
||||
if err = b.apply(f()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if p.options.cases != nil {
|
||||
b.apply(p.options.cases)
|
||||
}
|
||||
if comparing && p.options.ignorecase {
|
||||
b.apply(lowerCaseT)
|
||||
}
|
||||
b.apply(p.norm)
|
||||
if p.options.bidiRule && !bidirule.Valid(b.src) {
|
||||
return nil, bidirule.ErrInvalid
|
||||
}
|
||||
c := checker{p: p}
|
||||
if _, err := c.span(b.src, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p.disallow != nil {
|
||||
for i := 0; i < len(b.src); {
|
||||
r, size := utf8.DecodeRune(b.src[i:])
|
||||
if p.disallow.Contains(r) {
|
||||
return nil, errDisallowedRune
|
||||
}
|
||||
i += size
|
||||
}
|
||||
}
|
||||
if p.options.disallowEmpty && len(b.src) == 0 {
|
||||
return nil, errEmptyString
|
||||
}
|
||||
return b.src, nil
|
||||
}
|
||||
|
||||
// Append appends the result of applying p to src writing the result to dst.
|
||||
// It returns an error if the input string is invalid.
|
||||
func (p *Profile) Append(dst, src []byte) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, src, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return append(dst, b...), nil
|
||||
}
|
||||
|
||||
func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, b, key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if buf.next == 0 {
|
||||
c := make([]byte, len(b))
|
||||
copy(c, b)
|
||||
return c, nil
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying the profile to b.
|
||||
func (p *Profile) Bytes(b []byte) ([]byte, error) {
|
||||
return processBytes(p, b, false)
|
||||
}
|
||||
|
||||
// AppendCompareKey appends the result of applying p to src (including any
|
||||
// optional rules to make strings comparable or useful in a map key such as
|
||||
// applying lowercasing) writing the result to dst. It returns an error if the
|
||||
// input string is invalid.
|
||||
func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, src, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return append(dst, b...), nil
|
||||
}
|
||||
|
||||
func processString(p *Profile, s string, key bool) (string, error) {
|
||||
var buf buffers
|
||||
b, err := buf.enforce(p, []byte(s), key)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying the profile to s.
|
||||
func (p *Profile) String(s string) (string, error) {
|
||||
return processString(p, s, false)
|
||||
}
|
||||
|
||||
// CompareKey returns a string that can be used for comparison, hashing, or
|
||||
// collation.
|
||||
func (p *Profile) CompareKey(s string) (string, error) {
|
||||
return processString(p, s, true)
|
||||
}
|
||||
|
||||
// Compare enforces both strings, and then compares them for bit-string identity
|
||||
// (byte-for-byte equality). If either string cannot be enforced, the comparison
|
||||
// is false.
|
||||
func (p *Profile) Compare(a, b string) bool {
|
||||
var buf buffers
|
||||
|
||||
akey, err := buf.enforce(p, []byte(a), true)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
buf = buffers{}
|
||||
bkey, err := buf.enforce(p, []byte(b), true)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.Compare(akey, bkey) == 0
|
||||
}
|
||||
|
||||
// Allowed returns a runes.Set containing every rune that is a member of the
|
||||
// underlying profile's string class and not disallowed by any profile specific
|
||||
// rules.
|
||||
func (p *Profile) Allowed() runes.Set {
|
||||
if p.options.disallow != nil {
|
||||
return runes.Predicate(func(r rune) bool {
|
||||
return p.class.Contains(r) && !p.options.disallow.Contains(r)
|
||||
})
|
||||
}
|
||||
return p.class
|
||||
}
|
||||
|
||||
type checker struct {
|
||||
p *Profile
|
||||
allowed runes.Set
|
||||
|
||||
beforeBits catBitmap
|
||||
termBits catBitmap
|
||||
acceptBits catBitmap
|
||||
}
|
||||
|
||||
func (c *checker) Reset() {
|
||||
c.beforeBits = 0
|
||||
c.termBits = 0
|
||||
c.acceptBits = 0
|
||||
}
|
||||
|
||||
func (c *checker) span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
e, sz := dpTrie.lookup(src[n:])
|
||||
d := categoryTransitions[category(e&catMask)]
|
||||
if sz == 0 {
|
||||
if !atEOF {
|
||||
return n, transform.ErrShortSrc
|
||||
}
|
||||
return n, errDisallowedRune
|
||||
}
|
||||
doLookAhead := false
|
||||
if property(e) < c.p.class.validFrom {
|
||||
if d.rule == nil {
|
||||
return n, errDisallowedRune
|
||||
}
|
||||
doLookAhead, err = d.rule(c.beforeBits)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
c.beforeBits &= d.keep
|
||||
c.beforeBits |= d.set
|
||||
if c.termBits != 0 {
|
||||
// We are currently in an unterminated lookahead.
|
||||
if c.beforeBits&c.termBits != 0 {
|
||||
c.termBits = 0
|
||||
c.acceptBits = 0
|
||||
} else if c.beforeBits&c.acceptBits == 0 {
|
||||
// Invalid continuation of the unterminated lookahead sequence.
|
||||
return n, errContext
|
||||
}
|
||||
}
|
||||
if doLookAhead {
|
||||
if c.termBits != 0 {
|
||||
// A previous lookahead run has not been terminated yet.
|
||||
return n, errContext
|
||||
}
|
||||
c.termBits = d.term
|
||||
c.acceptBits = d.accept
|
||||
}
|
||||
n += sz
|
||||
}
|
||||
if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 {
|
||||
err = errContext
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// TODO: we may get rid of this transform if transform.Chain understands
|
||||
// something like a Spanner interface.
|
||||
func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
short := false
|
||||
if len(dst) < len(src) {
|
||||
src = src[:len(dst)]
|
||||
atEOF = false
|
||||
short = true
|
||||
}
|
||||
nSrc, err = c.span(src, atEOF)
|
||||
nDst = copy(dst, src[:nSrc])
|
||||
if short && (err == transform.ErrShortSrc || err == nil) {
|
||||
err = transform.ErrShortDst
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
var (
|
||||
// Implements the Nickname profile specified in RFC 7700.
|
||||
// The nickname profile is not idempotent and may need to be applied multiple
|
||||
// times before being used for comparisons.
|
||||
Nickname *Profile = nickname
|
||||
|
||||
// Implements the UsernameCaseMapped profile specified in RFC 7613.
|
||||
UsernameCaseMapped *Profile = usernameCaseMap
|
||||
|
||||
// Implements the UsernameCasePreserved profile specified in RFC 7613.
|
||||
UsernameCasePreserved *Profile = usernameNoCaseMap
|
||||
|
||||
// Implements the OpaqueString profile defined in RFC 7613 for passwords and other secure labels.
|
||||
OpaqueString *Profile = opaquestring
|
||||
)
|
||||
|
||||
var (
|
||||
nickname = &Profile{
|
||||
options: getOpts(
|
||||
AdditionalMapping(func() transform.Transformer {
|
||||
return &nickAdditionalMapping{}
|
||||
}),
|
||||
IgnoreCase,
|
||||
Norm(norm.NFKC),
|
||||
DisallowEmpty,
|
||||
),
|
||||
class: freeform,
|
||||
}
|
||||
usernameCaseMap = &Profile{
|
||||
options: getOpts(
|
||||
FoldWidth,
|
||||
LowerCase(),
|
||||
Norm(norm.NFC),
|
||||
BidiRule,
|
||||
),
|
||||
class: identifier,
|
||||
}
|
||||
usernameNoCaseMap = &Profile{
|
||||
options: getOpts(
|
||||
FoldWidth,
|
||||
Norm(norm.NFC),
|
||||
BidiRule,
|
||||
),
|
||||
class: identifier,
|
||||
}
|
||||
opaquestring = &Profile{
|
||||
options: getOpts(
|
||||
AdditionalMapping(func() transform.Transformer {
|
||||
return mapSpaces
|
||||
}),
|
||||
Norm(norm.NFC),
|
||||
DisallowEmpty,
|
||||
),
|
||||
class: freeform,
|
||||
}
|
||||
)
|
||||
|
||||
// mapSpaces is a shared value of a runes.Map transformer.
|
||||
var mapSpaces transform.Transformer = runes.Map(func(r rune) rune {
|
||||
if unicode.Is(unicode.Zs, r) {
|
||||
return ' '
|
||||
}
|
||||
return r
|
||||
})
|
File diff suppressed because it is too large
Load Diff
|
@ -1,32 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package precis
|
||||
|
||||
import "golang.org/x/text/transform"
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.Transformer
|
||||
}
|
||||
|
||||
// Reset implements the transform.Transformer interface.
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Transform implements the transform.Transformer interface.
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
// Bytes returns a new byte slice with the result of applying t to b.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, _ = transform.Bytes(t, b)
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of applying t to s.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, _ = transform.String(t, s)
|
||||
return s
|
||||
}
|
|
@ -1,64 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package precis
|
||||
|
||||
// entry is the entry of a trie table
|
||||
// 7..6 property (unassigned, disallowed, maybe, valid)
|
||||
// 5..0 category
|
||||
type entry uint8
|
||||
|
||||
const (
|
||||
propShift = 6
|
||||
propMask = 0xc0
|
||||
catMask = 0x3f
|
||||
)
|
||||
|
||||
func (e entry) property() property { return property(e & propMask) }
|
||||
func (e entry) category() category { return category(e & catMask) }
|
||||
|
||||
type property uint8
|
||||
|
||||
// The order of these constants matter. A Profile may consider runes to be
|
||||
// allowed either from pValid or idDisOrFreePVal.
|
||||
const (
|
||||
unassigned property = iota << propShift
|
||||
disallowed
|
||||
idDisOrFreePVal // disallowed for Identifier, pValid for FreeForm
|
||||
pValid
|
||||
)
|
||||
|
||||
// compute permutations of all properties and specialCategories.
|
||||
type category uint8
|
||||
|
||||
const (
|
||||
other category = iota
|
||||
|
||||
// Special rune types
|
||||
joiningL
|
||||
joiningD
|
||||
joiningT
|
||||
joiningR
|
||||
viramaModifier
|
||||
viramaJoinT // Virama + JoiningT
|
||||
latinSmallL // U+006c
|
||||
greek
|
||||
greekJoinT // Greek + JoiningT
|
||||
hebrew
|
||||
hebrewJoinT // Hebrew + JoiningT
|
||||
japanese // hirigana, katakana, han
|
||||
|
||||
// Special rune types associated with contextual rules defined in
|
||||
// https://tools.ietf.org/html/rfc5892#appendix-A.
|
||||
// ContextO
|
||||
zeroWidthNonJoiner // rule 1
|
||||
zeroWidthJoiner // rule 2
|
||||
// ContextJ
|
||||
middleDot // rule 3
|
||||
greekLowerNumeralSign // rule 4
|
||||
hebrewPreceding // rule 5 and 6
|
||||
katakanaMiddleDot // rule 7
|
||||
arabicIndicDigit // rule 8
|
||||
extendedArabicIndicDigit // rule 9
|
||||
|
||||
numCategories
|
||||
)
|
|
@ -60,7 +60,7 @@ github.com/Microsoft/hcsshim/internal/wclayer
|
|||
github.com/NYTimes/gziphandler
|
||||
# github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 => github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5
|
||||
github.com/Nvveen/Gotty
|
||||
# github.com/PuerkitoBio/purell v1.1.0 => github.com/PuerkitoBio/purell v1.0.0
|
||||
# github.com/PuerkitoBio/purell v1.1.0 => github.com/PuerkitoBio/purell v1.1.0
|
||||
github.com/PuerkitoBio/purell
|
||||
# github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
|
||||
github.com/PuerkitoBio/urlesc
|
||||
|
@ -957,7 +957,6 @@ golang.org/x/sys/windows
|
|||
golang.org/x/sys/windows/registry
|
||||
golang.org/x/sys/windows/svc
|
||||
# golang.org/x/text v0.3.0 => golang.org/x/text v0.0.0-20170810154203-b19bf474d317
|
||||
golang.org/x/text/cases
|
||||
golang.org/x/text/encoding
|
||||
golang.org/x/text/encoding/charmap
|
||||
golang.org/x/text/encoding/htmlindex
|
||||
|
@ -968,13 +967,11 @@ golang.org/x/text/encoding/korean
|
|||
golang.org/x/text/encoding/simplifiedchinese
|
||||
golang.org/x/text/encoding/traditionalchinese
|
||||
golang.org/x/text/encoding/unicode
|
||||
golang.org/x/text/internal
|
||||
golang.org/x/text/internal/tag
|
||||
golang.org/x/text/internal/utf8internal
|
||||
golang.org/x/text/language
|
||||
golang.org/x/text/runes
|
||||
golang.org/x/text/secure/bidirule
|
||||
golang.org/x/text/secure/precis
|
||||
golang.org/x/text/transform
|
||||
golang.org/x/text/unicode/bidi
|
||||
golang.org/x/text/unicode/norm
|
||||
|
|
Loading…
Reference in New Issue