github.com/PuerkitoBio/purell v1.1.0

Used only by github.com/go-openapi/..., all expecting v1.1.0
k3s-v1.15.3
Jordan Liggitt 2019-04-05 10:04:29 -04:00
parent 7a47bc3d1d
commit 5627034673
52 changed files with 51 additions and 10594 deletions

2
go.mod
View File

@ -224,7 +224,7 @@ replace (
github.com/Microsoft/hcsshim => github.com/Microsoft/hcsshim v0.0.0-20190110205307-69ac8d3f7fc1
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
github.com/Nvveen/Gotty => github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/Rican7/retry => github.com/Rican7/retry v0.0.0-20160712041035-272ad122d6e5
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc

4
go.sum
View File

@ -26,8 +26,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAo
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/Rican7/retry v0.0.0-20160712041035-272ad122d6e5 h1:lL761TuQsL4xn4Pn0CNk9YrPvrT8kzBMvW4YgCB7HMg=

View File

@ -43,7 +43,7 @@ replace (
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf

View File

@ -5,8 +5,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAokXB9qhlbKWPpo3KMLZQ5WB5WLQRW1uq0=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=

View File

@ -81,7 +81,7 @@ replace (
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf

View File

@ -6,8 +6,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAokXB9qhlbKWPpo3KMLZQ5WB5WLQRW1uq0=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=

View File

@ -29,7 +29,7 @@ require (
replace (
cloud.google.com/go => cloud.google.com/go v0.0.0-20160913182117-3b1ae45394a2
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/davecgh/go-spew => github.com/davecgh/go-spew v0.0.0-20170626231645-782f4967f2dc
github.com/dgrijalva/jwt-go => github.com/dgrijalva/jwt-go v0.0.0-20160705203006-01aeca54ebda

View File

@ -1,7 +1,7 @@
cloud.google.com/go v0.0.0-20160913182117-3b1ae45394a2/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/Azure/go-autorest v11.1.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/davecgh/go-spew v0.0.0-20170626231645-782f4967f2dc h1:0A0n6a0Y3vW5ktoWKC+ggkGXRzMJWMvqIYlFmsjwQzY=

View File

@ -19,7 +19,7 @@ replace (
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf

View File

@ -3,7 +3,7 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
github.com/Azure/go-autorest v11.1.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=

View File

@ -16,7 +16,7 @@ replace (
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf

View File

@ -3,7 +3,7 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
github.com/Azure/go-autorest v11.1.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=

View File

@ -32,7 +32,7 @@ replace (
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf

View File

@ -5,8 +5,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAokXB9qhlbKWPpo3KMLZQ5WB5WLQRW1uq0=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=

View File

@ -22,7 +22,7 @@ replace (
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
github.com/BurntSushi/toml => github.com/BurntSushi/toml v0.3.0
github.com/NYTimes/gziphandler => github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/alecthomas/template => github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc
github.com/alecthomas/units => github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf

View File

@ -5,8 +5,8 @@ github.com/BurntSushi/toml v0.3.0 h1:e1/Ivsx3Z0FVTV0NSOv/aVgbUWyQuzj7DDnFblkRvsY
github.com/BurntSushi/toml v0.3.0/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46 h1:lsxEuwrXEAokXB9qhlbKWPpo3KMLZQ5WB5WLQRW1uq0=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=

View File

@ -14,7 +14,7 @@ require (
replace (
cloud.google.com/go => cloud.google.com/go v0.0.0-20160913182117-3b1ae45394a2
github.com/Azure/go-autorest => github.com/Azure/go-autorest v11.1.0+incompatible
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.0.0
github.com/PuerkitoBio/purell => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/urlesc => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/davecgh/go-spew => github.com/davecgh/go-spew v0.0.0-20170626231645-782f4967f2dc
github.com/dgrijalva/jwt-go => github.com/dgrijalva/jwt-go v0.0.0-20160705203006-01aeca54ebda

View File

@ -1,7 +1,7 @@
cloud.google.com/go v0.0.0-20160913182117-3b1ae45394a2/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/Azure/go-autorest v11.1.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/PuerkitoBio/purell v1.0.0 h1:0GoNN3taZV6QI81IXgCbxMyEaJDXMSIjArYBCYzVVvs=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/purell v1.1.0 h1:rmGxhojJlM0tuKtfdvliR84CFHljx9ag64t2xmVkjK4=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2 h1:JCHLVE3B+kJde7bIEo5N4J+ZbLhp0J1Fs+ulyRws4gE=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/davecgh/go-spew v0.0.0-20170626231645-782f4967f2dc h1:0A0n6a0Y3vW5ktoWKC+ggkGXRzMJWMvqIYlFmsjwQzY=

5
vendor/BUILD vendored
View File

@ -407,13 +407,12 @@ filegroup(
"//vendor/golang.org/x/oauth2:all-srcs",
"//vendor/golang.org/x/sys/unix:all-srcs",
"//vendor/golang.org/x/sys/windows:all-srcs",
"//vendor/golang.org/x/text/cases:all-srcs",
"//vendor/golang.org/x/text/encoding:all-srcs",
"//vendor/golang.org/x/text/internal:all-srcs",
"//vendor/golang.org/x/text/internal/tag:all-srcs",
"//vendor/golang.org/x/text/internal/utf8internal:all-srcs",
"//vendor/golang.org/x/text/language:all-srcs",
"//vendor/golang.org/x/text/runes:all-srcs",
"//vendor/golang.org/x/text/secure/bidirule:all-srcs",
"//vendor/golang.org/x/text/secure/precis:all-srcs",
"//vendor/golang.org/x/text/transform:all-srcs",
"//vendor/golang.org/x/text/unicode/bidi:all-srcs",
"//vendor/golang.org/x/text/unicode/norm:all-srcs",

View File

@ -9,8 +9,8 @@ go_library(
deps = [
"//vendor/github.com/PuerkitoBio/urlesc:go_default_library",
"//vendor/golang.org/x/net/idna:go_default_library",
"//vendor/golang.org/x/text/secure/precis:go_default_library",
"//vendor/golang.org/x/text/unicode/norm:go_default_library",
"//vendor/golang.org/x/text/width:go_default_library",
],
)

View File

@ -12,6 +12,7 @@ Based on the [wikipedia paper][wiki] and the [RFC 3986 document][rfc].
## Changelog
* **2016-11-14 (v1.1.0)** : IDN: Conform to RFC 5895: Fold character width (thanks to @beeker1121).
* **2016-07-27 (v1.0.0)** : Normalize IDN to ASCII (thanks to @zenovich).
* **2015-02-08** : Add fix for relative paths issue ([PR #5][pr5]) and add fix for unnecessary encoding of reserved characters ([see issue #7][iss7]).
* **v0.2.0** : Add benchmarks, Attempt IDN support.
@ -172,6 +173,7 @@ And with `FlagsUnsafeGreedy`:
@opennota
@pchristopher1275
@zenovich
@beeker1121
## License

View File

@ -15,8 +15,8 @@ import (
"github.com/PuerkitoBio/urlesc"
"golang.org/x/net/idna"
"golang.org/x/text/secure/precis"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
)
// A set of normalization flags determines how a URL will
@ -150,22 +150,26 @@ func MustNormalizeURLString(u string, f NormalizationFlags) string {
// NormalizeURLString returns the normalized string, or an error if it can't be parsed into an URL object.
// It takes an URL string as input, as well as the normalization flags.
func NormalizeURLString(u string, f NormalizationFlags) (string, error) {
if parsed, e := url.Parse(u); e != nil {
return "", e
} else {
options := make([]precis.Option, 1, 3)
options[0] = precis.IgnoreCase
if f&FlagLowercaseHost == FlagLowercaseHost {
options = append(options, precis.FoldCase())
}
options = append(options, precis.Norm(norm.NFC))
profile := precis.NewFreeform(options...)
if parsed.Host, e = idna.ToASCII(profile.NewTransformer().String(parsed.Host)); e != nil {
return "", e
}
return NormalizeURL(parsed, f), nil
parsed, err := url.Parse(u)
if err != nil {
return "", err
}
panic("Unreachable code.")
if f&FlagLowercaseHost == FlagLowercaseHost {
parsed.Host = strings.ToLower(parsed.Host)
}
// The idna package doesn't fully conform to RFC 5895
// (https://tools.ietf.org/html/rfc5895), so we do it here.
// Taken from Go 1.8 cycle source, courtesy of bradfitz.
// TODO: Remove when (if?) idna package conforms to RFC 5895.
parsed.Host = width.Fold.String(parsed.Host)
parsed.Host = norm.NFC.String(parsed.Host)
if parsed.Host, err = idna.ToASCII(parsed.Host); err != nil {
return "", err
}
return NormalizeURL(parsed, f), nil
}
// NormalizeURL returns the normalized string.

38
vendor/golang.org/x/text/cases/BUILD generated vendored
View File

@ -1,38 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"cases.go",
"context.go",
"fold.go",
"info.go",
"map.go",
"tables.go",
"trieval.go",
],
cgo = True,
importmap = "k8s.io/kubernetes/vendor/golang.org/x/text/cases",
importpath = "golang.org/x/text/cases",
visibility = ["//visibility:public"],
deps = [
"//vendor/golang.org/x/text/internal:go_default_library",
"//vendor/golang.org/x/text/language:go_default_library",
"//vendor/golang.org/x/text/transform:go_default_library",
"//vendor/golang.org/x/text/unicode/norm:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -1,162 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_trieval.go
// Package cases provides general and language-specific case mappers.
package cases // import "golang.org/x/text/cases"
import (
"golang.org/x/text/language"
"golang.org/x/text/transform"
)
// References:
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
// - http://www.unicode.org/reports/tr29/
// - http://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
// - http://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
// - http://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
// - http://userguide.icu-project.org/transforms/casemappings
// TODO:
// - Case folding
// - Wide and Narrow?
// - Segmenter option for title casing.
// - ASCII fast paths
// - Encode Soft-Dotted property within trie somehow.
// A Caser transforms given input to a certain case. It implements
// transform.Transformer.
//
// A Caser may be stateful and should therefore not be shared between
// goroutines.
type Caser struct {
t transform.SpanningTransformer
}
// Bytes returns a new byte slice with the result of converting b to the case
// form implemented by c.
func (c Caser) Bytes(b []byte) []byte {
b, _, _ = transform.Bytes(c.t, b)
return b
}
// String returns a string with the result of transforming s to the case form
// implemented by c.
func (c Caser) String(s string) string {
s, _, _ = transform.String(c.t, s)
return s
}
// Reset resets the Caser to be reused for new input after a previous call to
// Transform.
func (c Caser) Reset() { c.t.Reset() }
// Transform implements the transform.Transformer interface and transforms the
// given input to the case form implemented by c.
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return c.t.Transform(dst, src, atEOF)
}
// Span implements the transform.SpanningTransformer interface.
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
return c.t.Span(src, atEOF)
}
// Upper returns a Caser for language-specific uppercasing.
func Upper(t language.Tag, opts ...Option) Caser {
return Caser{makeUpper(t, getOpts(opts...))}
}
// Lower returns a Caser for language-specific lowercasing.
func Lower(t language.Tag, opts ...Option) Caser {
return Caser{makeLower(t, getOpts(opts...))}
}
// Title returns a Caser for language-specific title casing. It uses an
// approximation of the default Unicode Word Break algorithm.
func Title(t language.Tag, opts ...Option) Caser {
return Caser{makeTitle(t, getOpts(opts...))}
}
// Fold returns a Caser that implements Unicode case folding. The returned Caser
// is stateless and safe to use concurrently by multiple goroutines.
//
// Case folding does not normalize the input and may not preserve a normal form.
// Use the collate or search package for more convenient and linguistically
// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
// where security aspects are a concern.
func Fold(opts ...Option) Caser {
return Caser{makeFold(getOpts(opts...))}
}
// An Option is used to modify the behavior of a Caser.
type Option func(o options) options
// TODO: consider these options to take a boolean as well, like FinalSigma.
// The advantage of using this approach is that other providers of a lower-case
// algorithm could set different defaults by prefixing a user-provided slice
// of options with their own. This is handy, for instance, for the precis
// package which would override the default to not handle the Greek final sigma.
var (
// NoLower disables the lowercasing of non-leading letters for a title
// caser.
NoLower Option = noLower
// Compact omits mappings in case folding for characters that would grow the
// input. (Unimplemented.)
Compact Option = compact
)
// TODO: option to preserve a normal form, if applicable?
type options struct {
noLower bool
simple bool
// TODO: segmenter, max ignorable, alternative versions, etc.
ignoreFinalSigma bool
}
func getOpts(o ...Option) (res options) {
for _, f := range o {
res = f(res)
}
return
}
func noLower(o options) options {
o.noLower = true
return o
}
func compact(o options) options {
o.simple = true
return o
}
// HandleFinalSigma specifies whether the special handling of Greek final sigma
// should be enabled. Unicode prescribes handling the Greek final sigma for all
// locales, but standards like IDNA and PRECIS override this default.
func HandleFinalSigma(enable bool) Option {
if enable {
return handleFinalSigma
}
return ignoreFinalSigma
}
func ignoreFinalSigma(o options) options {
o.ignoreFinalSigma = true
return o
}
func handleFinalSigma(o options) options {
o.ignoreFinalSigma = false
return o
}

View File

@ -1,376 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import "golang.org/x/text/transform"
// A context is used for iterating over source bytes, fetching case info and
// writing to a destination buffer.
//
// Casing operations may need more than one rune of context to decide how a rune
// should be cased. Casing implementations should call checkpoint on context
// whenever it is known to be safe to return the runes processed so far.
//
// It is recommended for implementations to not allow for more than 30 case
// ignorables as lookahead (analogous to the limit in norm) and to use state if
// unbounded lookahead is needed for cased runes.
type context struct {
dst, src []byte
atEOF bool
pDst int // pDst points past the last written rune in dst.
pSrc int // pSrc points to the start of the currently scanned rune.
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
nDst, nSrc int
err error
sz int // size of current rune
info info // case information of currently scanned rune
// State preserved across calls to Transform.
isMidWord bool // false if next cased letter needs to be title-cased.
}
func (c *context) Reset() {
c.isMidWord = false
}
// ret returns the return values for the Transform method. It checks whether
// there were insufficient bytes in src to complete and introduces an error
// accordingly, if necessary.
func (c *context) ret() (nDst, nSrc int, err error) {
if c.err != nil || c.nSrc == len(c.src) {
return c.nDst, c.nSrc, c.err
}
// This point is only reached by mappers if there was no short destination
// buffer. This means that the source buffer was exhausted and that c.sz was
// set to 0 by next.
if c.atEOF && c.pSrc == len(c.src) {
return c.pDst, c.pSrc, nil
}
return c.nDst, c.nSrc, transform.ErrShortSrc
}
// retSpan returns the return values for the Span method. It checks whether
// there were insufficient bytes in src to complete and introduces an error
// accordingly, if necessary.
func (c *context) retSpan() (n int, err error) {
_, nSrc, err := c.ret()
return nSrc, err
}
// checkpoint sets the return value buffer points for Transform to the current
// positions.
func (c *context) checkpoint() {
if c.err == nil {
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
}
}
// unreadRune causes the last rune read by next to be reread on the next
// invocation of next. Only one unreadRune may be called after a call to next.
func (c *context) unreadRune() {
c.sz = 0
}
func (c *context) next() bool {
c.pSrc += c.sz
if c.pSrc == len(c.src) || c.err != nil {
c.info, c.sz = 0, 0
return false
}
v, sz := trie.lookup(c.src[c.pSrc:])
c.info, c.sz = info(v), sz
if c.sz == 0 {
if c.atEOF {
// A zero size means we have an incomplete rune. If we are atEOF,
// this means it is an illegal rune, which we will consume one
// byte at a time.
c.sz = 1
} else {
c.err = transform.ErrShortSrc
return false
}
}
return true
}
// writeBytes adds bytes to dst.
func (c *context) writeBytes(b []byte) bool {
if len(c.dst)-c.pDst < len(b) {
c.err = transform.ErrShortDst
return false
}
// This loop is faster than using copy.
for _, ch := range b {
c.dst[c.pDst] = ch
c.pDst++
}
return true
}
// writeString writes the given string to dst.
func (c *context) writeString(s string) bool {
if len(c.dst)-c.pDst < len(s) {
c.err = transform.ErrShortDst
return false
}
// This loop is faster than using copy.
for i := 0; i < len(s); i++ {
c.dst[c.pDst] = s[i]
c.pDst++
}
return true
}
// copy writes the current rune to dst.
func (c *context) copy() bool {
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
}
// copyXOR copies the current rune to dst and modifies it by applying the XOR
// pattern of the case info. It is the responsibility of the caller to ensure
// that this is a rune with a XOR pattern defined.
func (c *context) copyXOR() bool {
if !c.copy() {
return false
}
if c.info&xorIndexBit == 0 {
// Fast path for 6-bit XOR pattern, which covers most cases.
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
} else {
// Interpret XOR bits as an index.
// TODO: test performance for unrolling this loop. Verify that we have
// at least two bytes and at most three.
idx := c.info >> xorShift
for p := c.pDst - 1; ; p-- {
c.dst[p] ^= xorData[idx]
idx--
if xorData[idx] == 0 {
break
}
}
}
return true
}
// hasPrefix returns true if src[pSrc:] starts with the given string.
func (c *context) hasPrefix(s string) bool {
b := c.src[c.pSrc:]
if len(b) < len(s) {
return false
}
for i, c := range b[:len(s)] {
if c != s[i] {
return false
}
}
return true
}
// caseType returns an info with only the case bits, normalized to either
// cLower, cUpper, cTitle or cUncased.
func (c *context) caseType() info {
cm := c.info & 0x7
if cm < 4 {
return cm
}
if cm >= cXORCase {
// xor the last bit of the rune with the case type bits.
b := c.src[c.pSrc+c.sz-1]
return info(b&1) ^ cm&0x3
}
if cm == cIgnorableCased {
return cLower
}
return cUncased
}
// lower writes the lowercase version of the current rune to dst.
func lower(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cLower {
return c.copy()
}
if c.info&exceptionBit == 0 {
return c.copyXOR()
}
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
return c.writeString(e[offset : offset+nLower])
}
return c.copy()
}
func isLower(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cLower {
return true
}
if c.info&exceptionBit == 0 {
c.err = transform.ErrEndOfSpan
return false
}
e := exceptions[c.info>>exceptionShift:]
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// upper writes the uppercase version of the current rune to dst.
func upper(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cUpper {
return c.copy()
}
if c.info&exceptionBit == 0 {
return c.copyXOR()
}
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
// Get length of first special case mapping.
n := (e[1] >> lengthBits) & lengthMask
if ct == cTitle {
// The first special case mapping is for lower. Set n to the second.
if n == noChange {
n = 0
}
n, e = e[1]&lengthMask, e[n:]
}
if n != noChange {
return c.writeString(e[offset : offset+n])
}
return c.copy()
}
// isUpper writes the isUppercase version of the current rune to dst.
func isUpper(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cUpper {
return true
}
if c.info&exceptionBit == 0 {
c.err = transform.ErrEndOfSpan
return false
}
e := exceptions[c.info>>exceptionShift:]
// Get length of first special case mapping.
n := (e[1] >> lengthBits) & lengthMask
if ct == cTitle {
n = e[1] & lengthMask
}
if n != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// title writes the title case version of the current rune to dst.
func title(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cTitle {
return c.copy()
}
if c.info&exceptionBit == 0 {
if ct == cLower {
return c.copyXOR()
}
return c.copy()
}
// Get the exception data.
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
nFirst := (e[1] >> lengthBits) & lengthMask
if nTitle := e[1] & lengthMask; nTitle != noChange {
if nFirst != noChange {
e = e[nFirst:]
}
return c.writeString(e[offset : offset+nTitle])
}
if ct == cLower && nFirst != noChange {
// Use the uppercase version instead.
return c.writeString(e[offset : offset+nFirst])
}
// Already in correct case.
return c.copy()
}
// isTitle reports whether the current rune is in title case.
func isTitle(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cTitle {
return true
}
if c.info&exceptionBit == 0 {
if ct == cLower {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// Get the exception data.
e := exceptions[c.info>>exceptionShift:]
if nTitle := e[1] & lengthMask; nTitle != noChange {
c.err = transform.ErrEndOfSpan
return false
}
nFirst := (e[1] >> lengthBits) & lengthMask
if ct == cLower && nFirst != noChange {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
// foldFull writes the foldFull version of the current rune to dst.
func foldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
return c.copy()
}
ct := c.caseType()
if c.info&exceptionBit == 0 {
if ct != cLower || c.info&inverseFoldBit != 0 {
return c.copyXOR()
}
return c.copy()
}
e := exceptions[c.info>>exceptionShift:]
n := e[0] & lengthMask
if n == 0 {
if ct == cLower {
return c.copy()
}
n = (e[1] >> lengthBits) & lengthMask
}
return c.writeString(e[2 : 2+n])
}
// isFoldFull reports whether the current run is mapped to foldFull
func isFoldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
return true
}
ct := c.caseType()
if c.info&exceptionBit == 0 {
if ct != cLower || c.info&inverseFoldBit != 0 {
c.err = transform.ErrEndOfSpan
return false
}
return true
}
e := exceptions[c.info>>exceptionShift:]
n := e[0] & lengthMask
if n == 0 && ct == cLower {
return true
}
c.err = transform.ErrEndOfSpan
return false
}

View File

@ -1,34 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import "golang.org/x/text/transform"
type caseFolder struct{ transform.NopResetter }
// caseFolder implements the Transformer interface for doing case folding.
func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() {
foldFull(&c)
c.checkpoint()
}
return c.ret()
}
func (t *caseFolder) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isFoldFull(&c) {
c.checkpoint()
}
return c.retSpan()
}
func makeFold(o options) transform.SpanningTransformer {
// TODO: Special case folding, through option Language, Special/Turkic, or
// both.
// TODO: Implement Compact options.
return &caseFolder{}
}

839
vendor/golang.org/x/text/cases/gen.go generated vendored
View File

@ -1,839 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This program generates the trie for casing operations. The Unicode casing
// algorithm requires the lookup of various properties and mappings for each
// rune. The table generated by this generator combines several of the most
// frequently used of these into a single trie so that they can be accessed
// with a single lookup.
package main
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"log"
"reflect"
"strconv"
"strings"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/norm"
)
func main() {
gen.Init()
genTables()
genTablesTest()
gen.Repackage("gen_trieval.go", "trieval.go", "cases")
}
// runeInfo contains all information for a rune that we care about for casing
// operations.
type runeInfo struct {
Rune rune
entry info // trie value for this rune.
CaseMode info
// Simple case mappings.
Simple [1 + maxCaseMode][]rune
// Special casing
HasSpecial bool
Conditional bool
Special [1 + maxCaseMode][]rune
// Folding
FoldSimple rune
FoldSpecial rune
FoldFull []rune
// TODO: FC_NFKC, or equivalent data.
// Properties
SoftDotted bool
CaseIgnorable bool
Cased bool
DecomposeGreek bool
BreakType string
BreakCat breakCategory
// We care mostly about 0, Above, and IotaSubscript.
CCC byte
}
type breakCategory int
const (
breakBreak breakCategory = iota
breakLetter
breakMid
)
// mapping returns the case mapping for the given case type.
func (r *runeInfo) mapping(c info) string {
if r.HasSpecial {
return string(r.Special[c])
}
if len(r.Simple[c]) != 0 {
return string(r.Simple[c])
}
return string(r.Rune)
}
func parse(file string, f func(p *ucd.Parser)) {
ucd.Parse(gen.OpenUCDFile(file), f)
}
func parseUCD() []runeInfo {
chars := make([]runeInfo, unicode.MaxRune)
get := func(r rune) *runeInfo {
c := &chars[r]
c.Rune = r
return c
}
parse("UnicodeData.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
ri.Simple[cUpper] = p.Runes(ucd.SimpleUppercaseMapping)
ri.Simple[cTitle] = p.Runes(ucd.SimpleTitlecaseMapping)
if p.String(ucd.GeneralCategory) == "Lt" {
ri.CaseMode = cTitle
}
})
// <code>; <property>
parse("PropList.txt", func(p *ucd.Parser) {
if p.String(1) == "Soft_Dotted" {
chars[p.Rune(0)].SoftDotted = true
}
})
// <code>; <word break type>
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
switch p.String(1) {
case "Case_Ignorable":
ri.CaseIgnorable = true
case "Cased":
ri.Cased = true
case "Lowercase":
ri.CaseMode = cLower
case "Uppercase":
ri.CaseMode = cUpper
}
})
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
parse("SpecialCasing.txt", func(p *ucd.Parser) {
// We drop all conditional special casing and deal with them manually in
// the language-specific case mappers. Rune 0x03A3 is the only one with
// a conditional formatting that is not language-specific. However,
// dealing with this letter is tricky, especially in a streaming
// context, so we deal with it in the Caser for Greek specifically.
ri := get(p.Rune(0))
if p.String(4) == "" {
ri.HasSpecial = true
ri.Special[cLower] = p.Runes(1)
ri.Special[cTitle] = p.Runes(2)
ri.Special[cUpper] = p.Runes(3)
} else {
ri.Conditional = true
}
})
// TODO: Use text breaking according to UAX #29.
// <code>; <word break type>
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
ri.BreakType = p.String(1)
// We collapse the word breaking properties onto the categories we need.
switch p.String(1) { // TODO: officially we need to canonicalize.
case "MidLetter", "MidNumLet", "Single_Quote":
ri.BreakCat = breakMid
if !ri.CaseIgnorable {
// finalSigma relies on the fact that all breakMid runes are
// also a Case_Ignorable. Revisit this code when this changes.
log.Fatalf("Rune %U, which has a break category mid, is not a case ignorable", ri)
}
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet", "Format", "ZWJ":
ri.BreakCat = breakLetter
}
})
// <code>; <type>; <mapping>
parse("CaseFolding.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
switch p.String(1) {
case "C":
ri.FoldSimple = p.Rune(2)
ri.FoldFull = p.Runes(2)
case "S":
ri.FoldSimple = p.Rune(2)
case "T":
ri.FoldSpecial = p.Rune(2)
case "F":
ri.FoldFull = p.Runes(2)
default:
log.Fatalf("%U: unknown type: %s", p.Rune(0), p.String(1))
}
})
return chars
}
func genTables() {
chars := parseUCD()
verifyProperties(chars)
t := triegen.NewTrie("case")
for i := range chars {
c := &chars[i]
makeEntry(c)
t.Insert(rune(i), uint64(c.entry))
}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "cases")
gen.WriteUnicodeVersion(w)
// TODO: write CLDR version after adding a mechanism to detect that the
// tables on which the manually created locale-sensitive casing code is
// based hasn't changed.
w.WriteVar("xorData", string(xorData))
w.WriteVar("exceptions", string(exceptionData))
sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
if err != nil {
log.Fatal(err)
}
w.Size += sz
}
func makeEntry(ri *runeInfo) {
if ri.CaseIgnorable {
if ri.Cased {
ri.entry = cIgnorableCased
} else {
ri.entry = cIgnorableUncased
}
} else {
ri.entry = ri.CaseMode
}
// TODO: handle soft-dotted.
ccc := cccOther
switch ri.CCC {
case 0: // Not_Reordered
ccc = cccZero
case above: // Above
ccc = cccAbove
}
switch ri.BreakCat {
case breakBreak:
ccc = cccBreak
case breakMid:
ri.entry |= isMidBit
}
ri.entry |= ccc
if ri.CaseMode == cUncased {
return
}
// Need to do something special.
if ri.CaseMode == cTitle || ri.HasSpecial || ri.mapping(cTitle) != ri.mapping(cUpper) {
makeException(ri)
return
}
if f := string(ri.FoldFull); len(f) > 0 && f != ri.mapping(cUpper) && f != ri.mapping(cLower) {
makeException(ri)
return
}
// Rune is either lowercase or uppercase.
orig := string(ri.Rune)
mapped := ""
if ri.CaseMode == cUpper {
mapped = ri.mapping(cLower)
} else {
mapped = ri.mapping(cUpper)
}
if len(orig) != len(mapped) {
makeException(ri)
return
}
if string(ri.FoldFull) == ri.mapping(cUpper) {
ri.entry |= inverseFoldBit
}
n := len(orig)
// Create per-byte XOR mask.
var b []byte
for i := 0; i < n; i++ {
b = append(b, orig[i]^mapped[i])
}
// Remove leading 0 bytes, but keep at least one byte.
for ; len(b) > 1 && b[0] == 0; b = b[1:] {
}
if len(b) == 1 && b[0]&0xc0 == 0 {
ri.entry |= info(b[0]) << xorShift
return
}
key := string(b)
x, ok := xorCache[key]
if !ok {
xorData = append(xorData, 0) // for detecting start of sequence
xorData = append(xorData, b...)
x = len(xorData) - 1
xorCache[key] = x
}
ri.entry |= info(x<<xorShift) | xorIndexBit
}
var xorCache = map[string]int{}
// xorData contains byte-wise XOR data for the least significant bytes of a
// UTF-8 encoded rune. An index points to the last byte. The sequence starts
// with a zero terminator.
var xorData = []byte{}
// See the comments in gen_trieval.go re "the exceptions slice".
var exceptionData = []byte{0}
// makeException encodes case mappings that cannot be expressed in a simple
// XOR diff.
func makeException(ri *runeInfo) {
ccc := ri.entry & cccMask
// Set exception bit and retain case type.
ri.entry &= 0x0007
ri.entry |= exceptionBit
if len(exceptionData) >= 1<<numExceptionBits {
log.Fatalf("%U:exceptionData too large %x > %d bits", ri.Rune, len(exceptionData), numExceptionBits)
}
// Set the offset in the exceptionData array.
ri.entry |= info(len(exceptionData) << exceptionShift)
orig := string(ri.Rune)
tc := ri.mapping(cTitle)
uc := ri.mapping(cUpper)
lc := ri.mapping(cLower)
ff := string(ri.FoldFull)
// addString sets the length of a string and adds it to the expansions array.
addString := func(s string, b *byte) {
if len(s) == 0 {
// Zero-length mappings exist, but only for conditional casing,
// which we are representing outside of this table.
log.Fatalf("%U: has zero-length mapping.", ri.Rune)
}
*b <<= 3
if s != orig {
n := len(s)
if n > 7 {
log.Fatalf("%U: mapping larger than 7 (%d)", ri.Rune, n)
}
*b |= byte(n)
exceptionData = append(exceptionData, s...)
}
}
// byte 0:
exceptionData = append(exceptionData, byte(ccc)|byte(len(ff)))
// byte 1:
p := len(exceptionData)
exceptionData = append(exceptionData, 0)
if len(ff) > 7 { // May be zero-length.
log.Fatalf("%U: fold string larger than 7 (%d)", ri.Rune, len(ff))
}
exceptionData = append(exceptionData, ff...)
ct := ri.CaseMode
if ct != cLower {
addString(lc, &exceptionData[p])
}
if ct != cUpper {
addString(uc, &exceptionData[p])
}
if ct != cTitle {
// If title is the same as upper, we set it to the original string so
// that it will be marked as not present. This implies title case is
// the same as upper case.
if tc == uc {
tc = orig
}
addString(tc, &exceptionData[p])
}
}
// sparseCompacter is a trie value block Compacter. There are many cases where
// successive runes alternate between lower- and upper-case. This Compacter
// exploits this by adding a special case type where the case value is obtained
// from or-ing it with the least-significant bit of the rune, creating large
// ranges of equal case values that compress well.
type sparseCompacter struct {
sparseBlocks [][]uint16
sparseOffsets []uint16
sparseCount int
}
// makeSparse returns the number of elements that compact block would contain
// as well as the modified values.
func makeSparse(vals []uint64) ([]uint16, int) {
// Copy the values.
values := make([]uint16, len(vals))
for i, v := range vals {
values[i] = uint16(v)
}
alt := func(i int, v uint16) uint16 {
if cm := info(v & fullCasedMask); cm == cUpper || cm == cLower {
// Convert cLower or cUpper to cXORCase value, which has the form 11x.
xor := v
xor &^= 1
xor |= uint16(i&1) ^ (v & 1)
xor |= 0x4
return xor
}
return v
}
var count int
var previous uint16
for i, v := range values {
if v != 0 {
// Try if the unmodified value is equal to the previous.
if v == previous {
continue
}
// Try if the xor-ed value is equal to the previous value.
a := alt(i, v)
if a == previous {
values[i] = a
continue
}
// This is a new value.
count++
// Use the xor-ed value if it will be identical to the next value.
if p := i + 1; p < len(values) && alt(p, values[p]) == a {
values[i] = a
v = a
}
}
previous = v
}
return values, count
}
func (s *sparseCompacter) Size(v []uint64) (int, bool) {
_, n := makeSparse(v)
// We limit using this method to having 16 entries.
if n > 16 {
return 0, false
}
return 2 + int(reflect.TypeOf(valueRange{}).Size())*n, true
}
func (s *sparseCompacter) Store(v []uint64) uint32 {
h := uint32(len(s.sparseOffsets))
values, sz := makeSparse(v)
s.sparseBlocks = append(s.sparseBlocks, values)
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
s.sparseCount += sz
return h
}
func (s *sparseCompacter) Handler() string {
// The sparse global variable and its lookup method is defined in gen_trieval.go.
return "sparse.lookup"
}
func (s *sparseCompacter) Print(w io.Writer) (retErr error) {
p := func(format string, args ...interface{}) {
_, err := fmt.Fprintf(w, format, args...)
if retErr == nil && err != nil {
retErr = err
}
}
ls := len(s.sparseBlocks)
if ls == len(s.sparseOffsets) {
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
}
p("// sparseOffsets: %d entries, %d bytes\n", ls+1, (ls+1)*2)
p("var sparseOffsets = %#v\n\n", s.sparseOffsets)
ns := s.sparseCount
p("// sparseValues: %d entries, %d bytes\n", ns, ns*4)
p("var sparseValues = [%d]valueRange {", ns)
for i, values := range s.sparseBlocks {
p("\n// Block %#x, offset %#x", i, s.sparseOffsets[i])
var v uint16
for i, nv := range values {
if nv != v {
if v != 0 {
p(",hi:%#02x},", 0x80+i-1)
}
if nv != 0 {
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
}
}
v = nv
}
if v != 0 {
p(",hi:%#02x},", 0x80+len(values)-1)
}
}
p("\n}\n\n")
return
}
// verifyProperties that properties of the runes that are relied upon in the
// implementation. Each property is marked with an identifier that is referred
// to in the places where it is used.
func verifyProperties(chars []runeInfo) {
for i, c := range chars {
r := rune(i)
// Rune properties.
// A.1: modifier never changes on lowercase. [ltLower]
if c.CCC > 0 && unicode.ToLower(r) != r {
log.Fatalf("%U: non-starter changes when lowercased", r)
}
// A.2: properties of decompositions starting with I or J. [ltLower]
d := norm.NFD.PropertiesString(string(r)).Decomposition()
if len(d) > 0 {
if d[0] == 'I' || d[0] == 'J' {
// A.2.1: we expect at least an ASCII character and a modifier.
if len(d) < 3 {
log.Fatalf("%U: length of decomposition was %d; want >= 3", r, len(d))
}
// All subsequent runes are modifiers and all have the same CCC.
runes := []rune(string(d[1:]))
ccc := chars[runes[0]].CCC
for _, mr := range runes[1:] {
mc := chars[mr]
// A.2.2: all modifiers have a CCC of Above or less.
if ccc == 0 || ccc > above {
log.Fatalf("%U: CCC of successive rune (%U) was %d; want (0,230]", r, mr, ccc)
}
// A.2.3: a sequence of modifiers all have the same CCC.
if mc.CCC != ccc {
log.Fatalf("%U: CCC of follow-up modifier (%U) was %d; want %d", r, mr, mc.CCC, ccc)
}
// A.2.4: for each trailing r, r in [0x300, 0x311] <=> CCC == Above.
if (ccc == above) != (0x300 <= mr && mr <= 0x311) {
log.Fatalf("%U: modifier %U in [U+0300, U+0311] != ccc(%U) == 230", r, mr, mr)
}
if i += len(string(mr)); i >= len(d) {
break
}
}
}
}
// A.3: no U+0307 in decomposition of Soft-Dotted rune. [ltUpper]
if unicode.Is(unicode.Soft_Dotted, r) && strings.Contains(string(d), "\u0307") {
log.Fatalf("%U: decomposition of soft-dotted rune may not contain U+0307", r)
}
// A.4: only rune U+0345 may be of CCC Iota_Subscript. [elUpper]
if c.CCC == iotaSubscript && r != 0x0345 {
log.Fatalf("%U: only rune U+0345 may have CCC Iota_Subscript", r)
}
// A.5: soft-dotted runes do not have exceptions.
if c.SoftDotted && c.entry&exceptionBit != 0 {
log.Fatalf("%U: soft-dotted has exception", r)
}
// A.6: Greek decomposition. [elUpper]
if unicode.Is(unicode.Greek, r) {
if b := norm.NFD.PropertiesString(string(r)).Decomposition(); b != nil {
runes := []rune(string(b))
// A.6.1: If a Greek rune decomposes and the first rune of the
// decomposition is greater than U+00FF, the rune is always
// great and not a modifier.
if f := runes[0]; unicode.IsMark(f) || f > 0xFF && !unicode.Is(unicode.Greek, f) {
log.Fatalf("%U: expected first rune of Greek decomposition to be letter, found %U", r, f)
}
// A.6.2: Any follow-up rune in a Greek decomposition is a
// modifier of which the first should be gobbled in
// decomposition.
for _, m := range runes[1:] {
switch m {
case 0x0313, 0x0314, 0x0301, 0x0300, 0x0306, 0x0342, 0x0308, 0x0304, 0x345:
default:
log.Fatalf("%U: modifier %U is outside of expected Greek modifier set", r, m)
}
}
}
}
// Breaking properties.
// B.1: all runes with CCC > 0 are of break type Extend.
if c.CCC > 0 && c.BreakType != "Extend" {
log.Fatalf("%U: CCC == %d, but got break type %s; want Extend", r, c.CCC, c.BreakType)
}
// B.2: all cased runes with c.CCC == 0 are of break type ALetter.
if c.CCC == 0 && c.Cased && c.BreakType != "ALetter" {
log.Fatalf("%U: cased, but got break type %s; want ALetter", r, c.BreakType)
}
// B.3: letter category.
if c.CCC == 0 && c.BreakCat != breakBreak && !c.CaseIgnorable {
if c.BreakCat != breakLetter {
log.Fatalf("%U: check for letter break type gave %d; want %d", r, c.BreakCat, breakLetter)
}
}
}
}
func genTablesTest() {
w := &bytes.Buffer{}
fmt.Fprintln(w, "var (")
printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)
// We discard the output as we know we have perfect functions. We run them
// just to verify the properties are correct.
n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
if n > 0 {
log.Fatalf("One of the discarded properties does not have a perfect filter.")
}
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
parse("SpecialCasing.txt", func(p *ucd.Parser) {
// Skip conditional entries.
if p.String(4) != "" {
return
}
r := p.Rune(0)
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
})
fmt.Fprint(w, "\t}\n\n")
// <code>; <type>; <runes>
table := map[rune]struct{ simple, full, special string }{}
parse("CaseFolding.txt", func(p *ucd.Parser) {
r := p.Rune(0)
t := p.String(1)
v := string(p.Runes(2))
if t != "T" && v == string(unicode.ToLower(r)) {
return
}
x := table[r]
switch t {
case "C":
x.full = v
x.simple = v
case "S":
x.simple = v
case "F":
x.full = v
case "T":
x.special = v
}
table[r] = x
})
fmt.Fprintln(w, "\tfoldMap = map[rune]struct{ simple, full, special string }{")
for r := rune(0); r < 0x10FFFF; r++ {
x, ok := table[r]
if !ok {
continue
}
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, x.simple, x.full, x.special)
}
fmt.Fprint(w, "\t}\n\n")
// Break property
notBreak := map[rune]bool{}
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
switch p.String(1) {
case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet", "ZWJ":
notBreak[p.Rune(0)] = true
}
})
fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
inBreak := false
for r := rune(0); r <= lastRuneForTesting; r++ {
if isBreak := !notBreak[r]; isBreak != inBreak {
if isBreak {
fmt.Fprintf(w, "\t\t{0x%x, ", r)
} else {
fmt.Fprintf(w, "0x%x},\n", r-1)
}
inBreak = isBreak
}
}
if inBreak {
fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
}
fmt.Fprint(w, "\t}\n\n")
// Word break test
// Filter out all samples that do not contain cased characters.
cased := map[rune]bool{}
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
if p.String(1) == "Cased" {
cased[p.Rune(0)] = true
}
})
fmt.Fprintln(w, "\tbreakTest = []string{")
parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
c := strings.Split(p.String(0), " ")
const sep = '|'
numCased := 0
test := ""
for ; len(c) >= 2; c = c[2:] {
if c[0] == "÷" && test != "" {
test += string(sep)
}
i, err := strconv.ParseUint(c[1], 16, 32)
r := rune(i)
if err != nil {
log.Fatalf("Invalid rune %q.", c[1])
}
if r == sep {
log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
}
if cased[r] {
numCased++
}
test += string(r)
}
if numCased > 1 {
fmt.Fprintf(w, "\t\t%q,\n", test)
}
})
fmt.Fprintln(w, "\t}")
fmt.Fprintln(w, ")")
gen.WriteGoFile("tables_test.go", "cases", w.Bytes())
}
// These functions are just used for verification that their definition have not
// changed in the Unicode Standard.
func verifyCased(r rune) bool {
return verifyLower(r) || verifyUpper(r) || unicode.IsTitle(r)
}
func verifyLower(r rune) bool {
return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
}
func verifyUpper(r rune) bool {
return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
}
// verifyIgnore is an approximation of the Case_Ignorable property using the
// core unicode package. It is used to reduce the size of the test data.
func verifyIgnore(r rune) bool {
props := []*unicode.RangeTable{
unicode.Mn,
unicode.Me,
unicode.Cf,
unicode.Lm,
unicode.Sk,
}
for _, p := range props {
if unicode.Is(p, r) {
return true
}
}
return false
}
// printProperties prints tables of rune properties from the given UCD file.
// A filter func f can be given to exclude certain values. A rune r will have
// the indicated property if it is in the generated table or if f(r).
func printProperties(w io.Writer, file, property string, f func(r rune) bool) int {
verify := map[rune]bool{}
n := 0
varNameParts := strings.Split(property, "_")
varNameParts[0] = strings.ToLower(varNameParts[0])
fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
parse(file, func(p *ucd.Parser) {
if p.String(1) == property {
r := p.Rune(0)
verify[r] = true
if !f(r) {
n++
fmt.Fprintf(w, "\t\t0x%.4x: true,\n", r)
}
}
})
fmt.Fprint(w, "\t}\n\n")
// Verify that f is correct, that is, it represents a subset of the property.
for r := rune(0); r <= lastRuneForTesting; r++ {
if !verify[r] && f(r) {
log.Fatalf("Incorrect filter func for property %q.", property)
}
}
return n
}
// The newCaseTrie, sparseValues and sparseOffsets definitions below are
// placeholders referred to by gen_trieval.go. The real definitions are
// generated by this program and written to tables.go.
func newCaseTrie(int) int { return 0 }
var (
sparseValues [0]valueRange
sparseOffsets [0]uint16
)

View File

@ -1,219 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains definitions for interpreting the trie value of the case
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds case information for a single rune. It is the value returned
// by a trie lookup. Most mapping information can be stored in a single 16-bit
// value. If not, for example when a rune is mapped to multiple runes, the value
// stores some basic case data and an index into an array with additional data.
//
// The per-rune values have the following format:
//
// if (exception) {
// 15..5 unsigned exception index
// 4 unused
// } else {
// 15..8 XOR pattern or index to XOR pattern for case mapping
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
// (TODO: is this bit necessary? Probably implied from case mode.)
// 2..0 case mode
//
// For the non-exceptional cases, a rune must be either uncased, lowercase or
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
// least-significant bits of the rune).
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
casedMask = 0x0003
fullCasedMask = 0x0007
ignorableMask = 0x0006
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
isMidBit = 1 << 6
exceptionBit = 1 << 3
exceptionShift = 5
numExceptionBits = 11
xorIndexBit = 1 << 6
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xff80 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
// title, upper and lower case and case ignorable. (For a definition of these
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
// cases, a rune can be both cased and case-ignorable. This is encoded by
// cIgnorableCased. A rune of this type is always lower case. Some runes are
// cased while not having a mapping.
//
// A common pattern for scripts in the Unicode standard is for upper and lower
// case runes to alternate for increasing rune values (e.g. the accented Latin
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
// characters). We use this property by defining a cXORCase mode, where the case
// mode (always upper or lower case) is derived from the rune value. As the XOR
// pattern for case mappings is often identical for successive runes, using
// cXORCase can result in large series of identical trie values. This, in turn,
// allows us to better compress the trie blocks.
const (
cUncased info = iota // 000
cTitle // 001
cLower // 010
cUpper // 011
cIgnorableUncased // 100
cIgnorableCased // 101 // lower case if mappings exist
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
maxCaseMode = cUpper
)
func (c info) isCased() bool {
return c&casedMask != 0
}
func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
func (c info) isMid() bool {
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
// the rune also has the break category Break (see below).
const (
cccBreak info = iota << 4
cccZero
cccAbove
cccOther
cccMask = cccBreak | cccZero | cccAbove | cccOther
)
const (
starter = 0
above = 230
iotaSubscript = 240
)
// The exceptions slice holds data that does not fit in a normal info entry.
// The entry is pointed to by the exception index in an entry. It has the
// following format:
//
// Header
// byte 0:
// 7..6 unused
// 5..4 CCC type (same bits as entry)
// 3 unused
// 2..0 length of fold
//
// byte 1:
// 7..6 unused
// 5..3 length of 1st mapping of case type
// 2..0 length of 2nd mapping of case type
//
// case 1st 2nd
// lower -> upper, title
// upper -> lower, title
// title -> lower, upper
//
// Lengths with the value 0x7 indicate no value and implies no change.
// A length of 0 indicates a mapping to zero-length string.
//
// Body bytes:
// case folding bytes
// lowercase mapping bytes
// uppercase mapping bytes
// titlecase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO)
//
// Fallbacks:
// missing fold -> lower
// missing title -> upper
// all missing -> original rune
//
// exceptions starts with a dummy byte to enforce that there is no zero index
// value.
const (
lengthMask = 0x07
lengthBits = 3
noChange = 0
)
// References to generated trie.
var trie = newCaseTrie(0)
var sparse = sparseBlocks{
values: sparseValues[:],
offsets: sparseOffsets[:],
}
// Sparse block lookup code.
// valueRange is an entry in a sparse block.
type valueRange struct {
value uint16
lo, hi byte
}
type sparseBlocks struct {
values []valueRange
offsets []uint16
}
// lookup returns the value from values block n for byte b using binary search.
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
lo := s.offsets[n]
hi := s.offsets[n+1]
for lo < hi {
m := lo + (hi-lo)/2
r := s.values[m]
if r.lo <= b && b <= r.hi {
return r.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
// lastRuneForTesting is the last rune used for testing. Everything after this
// is boring.
const lastRuneForTesting = rune(0x1FFFF)

View File

@ -1,61 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build icu
package cases
// Ideally these functions would be defined in a test file, but go test doesn't
// allow CGO in tests. The build tag should ensure either way that these
// functions will not end up in the package.
// TODO: Ensure that the correct ICU version is set.
/*
#cgo LDFLAGS: -licui18n.57 -licuuc.57
#include <stdlib.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
#include <unicode/localpointer.h>
#include <unicode/ucasemap.h>
*/
import "C"
import "unsafe"
func doICU(tag, caser, input string) string {
err := C.UErrorCode(0)
loc := C.CString(tag)
cm := C.ucasemap_open(loc, C.uint32_t(0), &err)
buf := make([]byte, len(input)*4)
dst := (*C.char)(unsafe.Pointer(&buf[0]))
src := C.CString(input)
cn := C.int32_t(0)
switch caser {
case "fold":
cn = C.ucasemap_utf8FoldCase(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "lower":
cn = C.ucasemap_utf8ToLower(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "upper":
cn = C.ucasemap_utf8ToUpper(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
case "title":
cn = C.ucasemap_utf8ToTitle(cm,
dst, C.int32_t(len(buf)),
src, C.int32_t(len(input)),
&err)
}
return string(buf[:cn])
}

View File

@ -1,82 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
func (c info) cccVal() info {
if c&exceptionBit != 0 {
return info(exceptions[c>>exceptionShift]) & cccMask
}
return c & cccMask
}
func (c info) cccType() info {
ccc := c.cccVal()
if ccc <= cccZero {
return cccZero
}
return ccc
}
// TODO: Implement full Unicode breaking algorithm:
// 1) Implement breaking in separate package.
// 2) Use the breaker here.
// 3) Compare table size and performance of using the more generic breaker.
//
// Note that we can extend the current algorithm to be much more accurate. This
// only makes sense, though, if the performance and/or space penalty of using
// the generic breaker is big. Extra data will only be needed for non-cased
// runes, which means there are sufficient bits left in the caseType.
// ICU prohibits breaking in such cases as well.
// For the purpose of title casing we use an approximation of the Unicode Word
// Breaking algorithm defined in Annex #29:
// http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table.
//
// For our approximation, we group the Word Break types into the following
// categories, with associated rules:
//
// 1) Letter:
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ.
// Rule: Never break between consecutive runes of this category.
//
// 2) Mid:
// MidLetter, MidNumLet, Single_Quote.
// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn,
// Me, Cf, Lm or Sk).
// Rule: Don't break between Letter and Mid, but break between two Mids.
//
// 3) Break:
// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and
// Other.
// These categories should always result in a break between two cased letters.
// Rule: Always break.
//
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in
// preventing a break between two cased letters. For now we will ignore this
// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and
// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].)
//
// Note 2: the rule for Mid is very approximate, but works in most cases. To
// improve, we could store the categories in the trie value and use a FA to
// manage breaks. See TODO comment above.
//
// Note 3: according to the spec, it is possible for the Extend category to
// introduce breaks between other categories grouped in Letter. However, this
// is undesirable for our purposes. ICU prevents breaks in such cases as well.
// isBreak returns whether this rune should introduce a break.
func (c info) isBreak() bool {
return c.cccVal() == cccBreak
}
// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter,
// Numeric, ExtendNumLet, or Extend.
func (c info) isLetter() bool {
ccc := c.cccVal()
if ccc == cccZero {
return !c.isCaseIgnorable()
}
return ccc != cccBreak
}

816
vendor/golang.org/x/text/cases/map.go generated vendored
View File

@ -1,816 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
// This file contains the definitions of case mappings for all supported
// languages. The rules for the language-specific tailorings were taken and
// modified from the CLDR transform definitions in common/transforms.
import (
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/text/internal"
"golang.org/x/text/language"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// A mapFunc takes a context set to the current rune and writes the mapped
// version to the same context. It may advance the context to the next rune. It
// returns whether a checkpoint is possible: whether the pDst bytes written to
// dst so far won't need changing as we see more source bytes.
type mapFunc func(*context) bool
// A spanFunc takes a context set to the current rune and returns whether this
// rune would be altered when written to the output. It may advance the context
// to the next rune. It returns whether a checkpoint is possible.
type spanFunc func(*context) bool
// maxIgnorable defines the maximum number of ignorables to consider for
// lookahead operations.
const maxIgnorable = 30
// supported lists the language tags for which we have tailorings.
const supported = "und af az el lt nl tr"
func init() {
tags := []language.Tag{}
for _, s := range strings.Split(supported, " ") {
tags = append(tags, language.MustParse(s))
}
matcher = internal.NewInheritanceMatcher(tags)
Supported = language.NewCoverage(tags)
}
var (
matcher *internal.InheritanceMatcher
Supported language.Coverage
// We keep the following lists separate, instead of having a single per-
// language struct, to give the compiler a chance to remove unused code.
// Some uppercase mappers are stateless, so we can precompute the
// Transformers and save a bit on runtime allocations.
upperFunc = []struct {
upper mapFunc
span spanFunc
}{
{nil, nil}, // und
{nil, nil}, // af
{aztrUpper(upper), isUpper}, // az
{elUpper, noSpan}, // el
{ltUpper(upper), noSpan}, // lt
{nil, nil}, // nl
{aztrUpper(upper), isUpper}, // tr
}
undUpper transform.SpanningTransformer = &undUpperCaser{}
undLower transform.SpanningTransformer = &undLowerCaser{}
undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
lowerFunc = []mapFunc{
nil, // und
nil, // af
aztrLower, // az
nil, // el
ltLower, // lt
nil, // nl
aztrLower, // tr
}
titleInfos = []struct {
title mapFunc
lower mapFunc
titleSpan spanFunc
rewrite func(*context)
}{
{title, lower, isTitle, nil}, // und
{title, lower, isTitle, afnlRewrite}, // af
{aztrUpper(title), aztrLower, isTitle, nil}, // az
{title, lower, isTitle, nil}, // el
{ltUpper(title), ltLower, noSpan, nil}, // lt
{nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl
{aztrUpper(title), aztrLower, isTitle, nil}, // tr
}
)
func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
f := upperFunc[i].upper
if f == nil {
return undUpper
}
return &simpleCaser{f: f, span: upperFunc[i].span}
}
func makeLower(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
f := lowerFunc[i]
if f == nil {
if o.ignoreFinalSigma {
return undLowerIgnoreSigma
}
return undLower
}
if o.ignoreFinalSigma {
return &simpleCaser{f: f, span: isLower}
}
return &lowerCaser{
first: f,
midWord: finalSigma(f),
}
}
func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
_, i, _ := matcher.Match(t)
x := &titleInfos[i]
lower := x.lower
if o.noLower {
lower = (*context).copy
} else if !o.ignoreFinalSigma {
lower = finalSigma(lower)
}
return &titleCaser{
title: x.title,
lower: lower,
titleSpan: x.titleSpan,
rewrite: x.rewrite,
}
}
func noSpan(c *context) bool {
c.err = transform.ErrEndOfSpan
return false
}
// TODO: consider a similar special case for the fast majority lower case. This
// is a bit more involved so will require some more precise benchmarking to
// justify it.
type undUpperCaser struct{ transform.NopResetter }
// undUpperCaser implements the Transformer interface for doing an upper case
// mapping for the root locale (und). It eliminates the need for an allocation
// as it prevents escaping by not using function pointers.
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() {
upper(&c)
c.checkpoint()
}
return c.ret()
}
func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isUpper(&c) {
c.checkpoint()
}
return c.retSpan()
}
// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
// a lower case mapping for the root locale (und) ignoring final sigma
// handling. This casing algorithm is used in some performance-critical packages
// like secure/precis and x/net/http/idna, which warrants its special-casing.
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() && lower(&c) {
c.checkpoint()
}
return c.ret()
}
// Span implements a generic lower-casing. This is possible as isLower works
// for all lowercasing variants. All lowercase variants only vary in how they
// transform a non-lowercase letter. They will never change an already lowercase
// letter. In addition, there is no state.
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isLower(&c) {
c.checkpoint()
}
return c.retSpan()
}
type simpleCaser struct {
context
f mapFunc
span spanFunc
}
// simpleCaser implements the Transformer interface for doing a case operation
// on a rune-by-rune basis.
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() && t.f(&c) {
c.checkpoint()
}
return c.ret()
}
func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && t.span(&c) {
c.checkpoint()
}
return c.retSpan()
}
// undLowerCaser implements the Transformer interface for doing a lower case
// mapping for the root locale (und) ignoring final sigma handling. This casing
// algorithm is used in some performance-critical packages like secure/precis
// and x/net/http/idna, which warrants its special-casing.
type undLowerCaser struct{ transform.NopResetter }
func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for isInterWord := true; c.next(); {
if isInterWord {
if c.info.isCased() {
if !lower(&c) {
break
}
isInterWord = false
} else if !c.copy() {
break
}
} else {
if c.info.isNotCasedAndNotCaseIgnorable() {
if !c.copy() {
break
}
isInterWord = true
} else if !c.hasPrefix("Σ") {
if !lower(&c) {
break
}
} else if !finalSigmaBody(&c) {
break
}
}
c.checkpoint()
}
return c.ret()
}
func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
c := context{src: src, atEOF: atEOF}
for c.next() && isLower(&c) {
c.checkpoint()
}
return c.retSpan()
}
// lowerCaser implements the Transformer interface. The default Unicode lower
// casing requires different treatment for the first and subsequent characters
// of a word, most notably to handle the Greek final Sigma.
type lowerCaser struct {
undLowerIgnoreSigmaCaser
context
first, midWord mapFunc
}
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
t.context = context{dst: dst, src: src, atEOF: atEOF}
c := &t.context
for isInterWord := true; c.next(); {
if isInterWord {
if c.info.isCased() {
if !t.first(c) {
break
}
isInterWord = false
} else if !c.copy() {
break
}
} else {
if c.info.isNotCasedAndNotCaseIgnorable() {
if !c.copy() {
break
}
isInterWord = true
} else if !t.midWord(c) {
break
}
}
c.checkpoint()
}
return c.ret()
}
// titleCaser implements the Transformer interface. Title casing algorithms
// distinguish between the first letter of a word and subsequent letters of the
// same word. It uses state to avoid requiring a potentially infinite lookahead.
type titleCaser struct {
context
// rune mappings used by the actual casing algorithms.
title mapFunc
lower mapFunc
titleSpan spanFunc
rewrite func(*context)
}
// Transform implements the standard Unicode title case algorithm as defined in
// Chapter 3 of The Unicode Standard:
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
// first cased character F following the word boundary. If F exists, map F to
// Titlecase_Mapping(F); then map all characters C between F and the following
// word boundary to Lowercase_Mapping(C).
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
c := &t.context
if !c.next() {
return c.ret()
}
for {
p := c.info
if t.rewrite != nil {
t.rewrite(c)
}
wasMid := p.isMid()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() {
if !c.isMidWord {
if !t.title(c) {
break
}
c.isMidWord = true
} else if !t.lower(c) {
break
}
} else if !c.copy() {
break
} else if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
if !(c.isMidWord && wasMid) {
c.checkpoint()
}
if !c.next() {
break
}
if wasMid && c.info.isMid() {
c.isMidWord = false
}
}
return c.ret()
}
func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
c := &t.context
if !c.next() {
return c.retSpan()
}
for {
p := c.info
if t.rewrite != nil {
t.rewrite(c)
}
wasMid := p.isMid()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() {
if !c.isMidWord {
if !t.titleSpan(c) {
break
}
c.isMidWord = true
} else if !isLower(c) {
break
}
} else if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
if !(c.isMidWord && wasMid) {
c.checkpoint()
}
if !c.next() {
break
}
if wasMid && c.info.isMid() {
c.isMidWord = false
}
}
return c.retSpan()
}
// finalSigma adds Greek final Sigma handing to another casing function. It
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
// case-ignorables and a cased letters.
func finalSigma(f mapFunc) mapFunc {
return func(c *context) bool {
if !c.hasPrefix("Σ") {
return f(c)
}
return finalSigmaBody(c)
}
}
func finalSigmaBody(c *context) bool {
// Current rune must be ∑.
// ::NFD();
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
// Σ } [:case-ignorable:]* [:cased:] → σ;
// [:cased:] [:case-ignorable:]* { Σ → ς;
// ::Any-Lower;
// ::NFC();
p := c.pDst
c.writeString("ς")
// TODO: we should do this here, but right now this will never have an
// effect as this is called when the prefix is Sigma, whereas Dutch and
// Afrikaans only test for an apostrophe.
//
// if t.rewrite != nil {
// t.rewrite(c)
// }
// We need to do one more iteration after maxIgnorable, as a cased
// letter is not an ignorable and may modify the result.
wasMid := false
for i := 0; i < maxIgnorable+1; i++ {
if !c.next() {
return false
}
if !c.info.isCaseIgnorable() {
// All Midword runes are also case ignorable, so we are
// guaranteed to have a letter or word break here. As we are
// unreading the run, there is no need to unset c.isMidWord;
// the title caser will handle this.
if c.info.isCased() {
// p+1 is guaranteed to be in bounds: if writing ς was
// successful, p+1 will contain the second byte of ς. If not,
// this function will have returned after c.next returned false.
c.dst[p+1]++ // ς → σ
}
c.unreadRune()
return true
}
// A case ignorable may also introduce a word break, so we may need
// to continue searching even after detecting a break.
isMid := c.info.isMid()
if (wasMid && isMid) || c.info.isBreak() {
c.isMidWord = false
}
wasMid = isMid
c.copy()
}
return true
}
// finalSigmaSpan would be the same as isLower.
// elUpper implements Greek upper casing, which entails removing a predefined
// set of non-blocked modifiers. Note that these accents should not be removed
// for title casing!
// Example: "Οδός" -> "ΟΔΟΣ".
func elUpper(c *context) bool {
// From CLDR:
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
oldPDst := c.pDst
if !upper(c) {
return false
}
if !unicode.Is(unicode.Greek, r) {
return true
}
i := 0
// Take the properties of the uppercased rune that is already written to the
// destination. This saves us the trouble of having to uppercase the
// decomposed rune again.
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
// Restore the destination position and process the decomposed rune.
r, sz := utf8.DecodeRune(b)
if r <= 0xFF { // See A.6.1
return true
}
c.pDst = oldPDst
// Insert the first rune and ignore the modifiers. See A.6.2.
c.writeBytes(b[:sz])
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
}
for ; i < maxIgnorable && c.next(); i++ {
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
// Above and Iota Subscript
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
0x0301, // U+0301 COMBINING ACUTE ACCENT
0x0304, // U+0304 COMBINING MACRON
0x0306, // U+0306 COMBINING BREVE
0x0308, // U+0308 COMBINING DIAERESIS
0x0313, // U+0313 COMBINING COMMA ABOVE
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
// No-op. Gobble the modifier.
default:
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
case cccZero:
c.unreadRune()
return true
// We don't need to test for IotaSubscript as the only rune that
// qualifies (U+0345) was already excluded in the switch statement
// above. See A.4.
case cccAbove:
return c.copy()
default:
// Some other modifier. We're still allowed to gobble Greek
// modifiers after this.
c.copy()
}
}
}
return i == maxIgnorable
}
// TODO: implement elUpperSpan (low-priority: complex and infrequent).
func ltLower(c *context) bool {
// From CLDR:
// # Introduce an explicit dot above when lowercasing capital I's and J's
// # whenever there are more accents above.
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
// ::NFD();
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
// I \u0300 (Ì) → i \u0307 \u0300;
// I \u0301 (Í) → i \u0307 \u0301;
// I \u0303 (Ĩ) → i \u0307 \u0303;
// ::Any-Lower();
// ::NFC();
i := 0
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
lower(c)
if r != 'I' && r != 'J' {
return true
}
} else {
p := norm.NFD.Properties(c.src[c.pSrc:])
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
// UTF-8 optimization: the decomposition will only have an above
// modifier if the last rune of the decomposition is in [U+300-U+311].
// In all other cases, a decomposition starting with I is always
// an I followed by modifiers that are not cased themselves. See A.2.
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
if !c.writeBytes(d[:1]) {
return false
}
c.dst[c.pDst-1] += 'a' - 'A' // lower
// Assumption: modifier never changes on lowercase. See A.1.
// Assumption: all modifiers added have CCC = Above. See A.2.3.
return c.writeString("\u0307") && c.writeBytes(d[1:])
}
// In all other cases the additional modifiers will have a CCC
// that is less than 230 (Above). We will insert the U+0307, if
// needed, after these modifiers so that a string in FCD form
// will remain so. See A.2.2.
lower(c)
i = 1
} else {
return lower(c)
}
}
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccZero:
c.unreadRune()
return true
case cccAbove:
return c.writeString("\u0307") && c.copy() // See A.1.
default:
c.copy() // See A.1.
}
}
return i == maxIgnorable
}
// ltLowerSpan would be the same as isLower.
func ltUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// Unicode:
// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
//
// From CLDR:
// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
// # intervening non-230 marks.
// ::NFD();
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
// ::Any-Upper();
// ::NFC();
// TODO: See A.5. A soft-dotted rune never has an exception. This would
// allow us to overload the exception bit and encode this property in
// info. Need to measure performance impact of this.
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
oldPDst := c.pDst
if !f(c) {
return false
}
if !unicode.Is(unicode.Soft_Dotted, r) {
return true
}
// We don't need to do an NFD normalization, as a soft-dotted rune never
// contains U+0307. See A.3.
i := 0
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccZero:
c.unreadRune()
return true
case cccAbove:
if c.hasPrefix("\u0307") {
// We don't do a full NFC, but rather combine runes for
// some of the common cases. (Returning NFC or
// preserving normal form is neither a requirement nor
// a possibility anyway).
if !c.next() {
return false
}
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
s := ""
switch c.src[c.pSrc+1] {
case 0x80: // U+0300 COMBINING GRAVE ACCENT
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
case 0x81: // U+0301 COMBINING ACUTE ACCENT
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
case 0x83: // U+0303 COMBINING TILDE
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
case 0x88: // U+0308 COMBINING DIAERESIS
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
default:
}
if s != "" {
c.pDst = oldPDst
return c.writeString(s)
}
}
}
return c.copy()
default:
c.copy()
}
}
return i == maxIgnorable
}
}
// TODO: implement ltUpperSpan (low priority: complex and infrequent).
func aztrUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// i→İ;
if c.src[c.pSrc] == 'i' {
return c.writeString("İ")
}
return f(c)
}
}
func aztrLower(c *context) (done bool) {
// From CLDR:
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
// İ→i;
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
// # This matches the behavior of the canonically equivalent I-dot_above
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
// I→ı ;
// ::Any-Lower();
if c.hasPrefix("\u0130") { // İ
return c.writeString("i")
}
if c.src[c.pSrc] != 'I' {
return lower(c)
}
// We ignore the lower-case I for now, but insert it later when we know
// which form we need.
start := c.pSrc + c.sz
i := 0
Loop:
// We check for up to n ignorables before \u0307. As \u0307 is an
// ignorable as well, n is maxIgnorable-1.
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccAbove:
if c.hasPrefix("\u0307") {
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
}
done = true
break Loop
case cccZero:
c.unreadRune()
done = true
break Loop
default:
// We'll write this rune after we know which starter to use.
}
}
if i == maxIgnorable {
done = true
}
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
}
// aztrLowerSpan would be the same as isLower.
func nlTitle(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
// ::Any-Title();
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
return title(c)
}
if !c.writeString("I") || !c.next() {
return false
}
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
return c.writeString("J")
}
c.unreadRune()
return true
}
func nlTitleSpan(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
// ::Any-Title();
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
if c.src[c.pSrc] != 'I' {
return isTitle(c)
}
if !c.next() || c.src[c.pSrc] == 'j' {
return false
}
if c.src[c.pSrc] != 'J' {
c.unreadRune()
}
return true
}
// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
func afnlRewrite(c *context) {
if c.hasPrefix("'") || c.hasPrefix("") {
c.isMidWord = true
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,215 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package cases
// This file contains definitions for interpreting the trie value of the case
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds case information for a single rune. It is the value returned
// by a trie lookup. Most mapping information can be stored in a single 16-bit
// value. If not, for example when a rune is mapped to multiple runes, the value
// stores some basic case data and an index into an array with additional data.
//
// The per-rune values have the following format:
//
// if (exception) {
// 15..5 unsigned exception index
// 4 unused
// } else {
// 15..8 XOR pattern or index to XOR pattern for case mapping
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
// (TODO: is this bit necessary? Probably implied from case mode.)
// 2..0 case mode
//
// For the non-exceptional cases, a rune must be either uncased, lowercase or
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
// least-significant bits of the rune).
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
casedMask = 0x0003
fullCasedMask = 0x0007
ignorableMask = 0x0006
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
isMidBit = 1 << 6
exceptionBit = 1 << 3
exceptionShift = 5
numExceptionBits = 11
xorIndexBit = 1 << 6
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xff80 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
// title, upper and lower case and case ignorable. (For a definition of these
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
// cases, a rune can be both cased and case-ignorable. This is encoded by
// cIgnorableCased. A rune of this type is always lower case. Some runes are
// cased while not having a mapping.
//
// A common pattern for scripts in the Unicode standard is for upper and lower
// case runes to alternate for increasing rune values (e.g. the accented Latin
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
// characters). We use this property by defining a cXORCase mode, where the case
// mode (always upper or lower case) is derived from the rune value. As the XOR
// pattern for case mappings is often identical for successive runes, using
// cXORCase can result in large series of identical trie values. This, in turn,
// allows us to better compress the trie blocks.
const (
cUncased info = iota // 000
cTitle // 001
cLower // 010
cUpper // 011
cIgnorableUncased // 100
cIgnorableCased // 101 // lower case if mappings exist
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
maxCaseMode = cUpper
)
func (c info) isCased() bool {
return c&casedMask != 0
}
func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
func (c info) isMid() bool {
return c&(fullCasedMask|isMidBit) == isMidBit|cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
// the rune also has the break category Break (see below).
const (
cccBreak info = iota << 4
cccZero
cccAbove
cccOther
cccMask = cccBreak | cccZero | cccAbove | cccOther
)
const (
starter = 0
above = 230
iotaSubscript = 240
)
// The exceptions slice holds data that does not fit in a normal info entry.
// The entry is pointed to by the exception index in an entry. It has the
// following format:
//
// Header
// byte 0:
// 7..6 unused
// 5..4 CCC type (same bits as entry)
// 3 unused
// 2..0 length of fold
//
// byte 1:
// 7..6 unused
// 5..3 length of 1st mapping of case type
// 2..0 length of 2nd mapping of case type
//
// case 1st 2nd
// lower -> upper, title
// upper -> lower, title
// title -> lower, upper
//
// Lengths with the value 0x7 indicate no value and implies no change.
// A length of 0 indicates a mapping to zero-length string.
//
// Body bytes:
// case folding bytes
// lowercase mapping bytes
// uppercase mapping bytes
// titlecase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO)
//
// Fallbacks:
// missing fold -> lower
// missing title -> upper
// all missing -> original rune
//
// exceptions starts with a dummy byte to enforce that there is no zero index
// value.
const (
lengthMask = 0x07
lengthBits = 3
noChange = 0
)
// References to generated trie.
var trie = newCaseTrie(0)
var sparse = sparseBlocks{
values: sparseValues[:],
offsets: sparseOffsets[:],
}
// Sparse block lookup code.
// valueRange is an entry in a sparse block.
type valueRange struct {
value uint16
lo, hi byte
}
type sparseBlocks struct {
values []valueRange
offsets []uint16
}
// lookup returns the value from values block n for byte b using binary search.
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
lo := s.offsets[n]
hi := s.offsets[n+1]
for lo < hi {
m := lo + (hi-lo)/2
r := s.values[m]
if r.lo <= b && b <= r.hi {
return r.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
// lastRuneForTesting is the last rune used for testing. Everything after this
// is boring.
const lastRuneForTesting = rune(0x1FFFF)

View File

@ -1,32 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"internal.go",
"match.go",
"tables.go",
],
importmap = "k8s.io/kubernetes/vendor/golang.org/x/text/internal",
importpath = "golang.org/x/text/internal",
visibility = ["//visibility:public"],
deps = ["//vendor/golang.org/x/text/language:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//vendor/golang.org/x/text/internal/tag:all-srcs",
"//vendor/golang.org/x/text/internal/utf8internal:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -1,52 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
func main() {
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "internal")
// Create parents table.
parents := make([]uint16, language.NumCompactTags)
for _, loc := range data.Locales() {
tag := language.MustParse(loc)
index, ok := language.CompactIndex(tag)
if !ok {
continue
}
parentIndex := 0 // und
for p := tag.Parent(); p != language.Und; p = p.Parent() {
if x, ok := language.CompactIndex(p); ok {
parentIndex = x
break
}
}
parents[index] = uint16(parentIndex)
}
w.WriteComment(`
Parent maps a compact index of a tag to the compact index of the parent of
this tag.`)
w.WriteVar("Parent", parents)
}

View File

@ -1,51 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go
// Package internal contains non-exported functionality that are used by
// packages in the text repository.
package internal // import "golang.org/x/text/internal"
import (
"sort"
"golang.org/x/text/language"
)
// SortTags sorts tags in place.
func SortTags(tags []language.Tag) {
sort.Sort(sorter(tags))
}
type sorter []language.Tag
func (s sorter) Len() int {
return len(s)
}
func (s sorter) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s sorter) Less(i, j int) bool {
return s[i].String() < s[j].String()
}
// UniqueTags sorts and filters duplicate tags in place and returns a slice with
// only unique tags.
func UniqueTags(tags []language.Tag) []language.Tag {
if len(tags) <= 1 {
return tags
}
SortTags(tags)
k := 0
for i := 1; i < len(tags); i++ {
if tags[k].String() < tags[i].String() {
k++
tags[k] = tags[i]
}
}
return tags[:k+1]
}

View File

@ -1,67 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
// This file contains matchers that implement CLDR inheritance.
//
// See http://unicode.org/reports/tr35/#Locale_Inheritance.
//
// Some of the inheritance described in this document is already handled by
// the cldr package.
import (
"golang.org/x/text/language"
)
// TODO: consider if (some of the) matching algorithm needs to be public after
// getting some feel about what is generic and what is specific.
// NewInheritanceMatcher returns a matcher that matches based on the inheritance
// chain.
//
// The matcher uses canonicalization and the parent relationship to find a
// match. The resulting match will always be either Und or a language with the
// same language and script as the requested language. It will not match
// languages for which there is understood to be mutual or one-directional
// intelligibility.
//
// A Match will indicate an Exact match if the language matches after
// canonicalization and High if the matched tag is a parent.
func NewInheritanceMatcher(t []language.Tag) *InheritanceMatcher {
tags := &InheritanceMatcher{make(map[language.Tag]int)}
for i, tag := range t {
ct, err := language.All.Canonicalize(tag)
if err != nil {
ct = tag
}
tags.index[ct] = i
}
return tags
}
type InheritanceMatcher struct {
index map[language.Tag]int
}
func (m InheritanceMatcher) Match(want ...language.Tag) (language.Tag, int, language.Confidence) {
for _, t := range want {
ct, err := language.All.Canonicalize(t)
if err != nil {
ct = t
}
conf := language.Exact
for {
if index, ok := m.index[ct]; ok {
return ct, index, conf
}
if ct == language.Und {
break
}
ct = ct.Parent()
conf = language.High
}
}
return language.Und, 0, language.No
}

View File

@ -1,117 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package internal
// Parent maps a compact index of a tag to the compact index of the parent of
// this tag.
var Parent = []uint16{ // 754 elements
// Entry 0 - 3F
0x0000, 0x0053, 0x00e5, 0x0000, 0x0003, 0x0003, 0x0000, 0x0006,
0x0000, 0x0008, 0x0000, 0x000a, 0x0000, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
0x000c, 0x0000, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x002e,
0x0000, 0x0000, 0x0031, 0x0030, 0x0030, 0x0000, 0x0035, 0x0000,
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x003d, 0x0000,
// Entry 40 - 7F
0x0000, 0x0040, 0x0000, 0x0042, 0x0042, 0x0000, 0x0045, 0x0045,
0x0000, 0x0048, 0x0000, 0x004a, 0x0000, 0x0000, 0x004d, 0x004c,
0x004c, 0x0000, 0x0051, 0x0051, 0x0051, 0x0051, 0x0000, 0x0056,
0x0000, 0x0058, 0x0000, 0x005a, 0x0000, 0x005c, 0x005c, 0x0000,
0x005f, 0x0000, 0x0061, 0x0000, 0x0063, 0x0000, 0x0065, 0x0065,
0x0000, 0x0068, 0x0000, 0x006a, 0x006a, 0x006a, 0x006a, 0x006a,
0x006a, 0x006a, 0x0000, 0x0072, 0x0000, 0x0074, 0x0000, 0x0076,
0x0000, 0x0000, 0x0079, 0x0000, 0x007b, 0x0000, 0x007d, 0x0000,
// Entry 80 - BF
0x007f, 0x007f, 0x0000, 0x0082, 0x0082, 0x0000, 0x0085, 0x0086,
0x0086, 0x0086, 0x0085, 0x0087, 0x0086, 0x0086, 0x0086, 0x0085,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086,
0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0087, 0x0086, 0x0086,
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0086, 0x0085, 0x0086,
// Entry C0 - FF
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0085,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0086,
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0085, 0x0086, 0x0086,
0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0000, 0x00ee,
0x0000, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1,
0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1, 0x00f0, 0x00f0, 0x00f1,
// Entry 100 - 13F
0x00f1, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1,
0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x0000, 0x010d, 0x0000,
0x010f, 0x0000, 0x0111, 0x0000, 0x0113, 0x0113, 0x0000, 0x0116,
0x0116, 0x0116, 0x0116, 0x0000, 0x011b, 0x0000, 0x011d, 0x0000,
0x011f, 0x011f, 0x0000, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
// Entry 140 - 17F
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0000, 0x0151, 0x0000, 0x0153, 0x0000, 0x0155, 0x0000,
0x0157, 0x0000, 0x0159, 0x0000, 0x015b, 0x015b, 0x015b, 0x0000,
0x015f, 0x0000, 0x0000, 0x0162, 0x0000, 0x0164, 0x0000, 0x0166,
0x0166, 0x0166, 0x0000, 0x016a, 0x0000, 0x016c, 0x0000, 0x016e,
0x0000, 0x0170, 0x0170, 0x0000, 0x0173, 0x0000, 0x0175, 0x0000,
0x0177, 0x0000, 0x0179, 0x0000, 0x017b, 0x0000, 0x017d, 0x0000,
// Entry 180 - 1BF
0x017f, 0x0000, 0x0181, 0x0181, 0x0181, 0x0181, 0x0000, 0x0000,
0x0187, 0x0000, 0x0000, 0x018a, 0x0000, 0x018c, 0x0000, 0x0000,
0x018f, 0x0000, 0x0191, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000,
0x0197, 0x0000, 0x0199, 0x0000, 0x019b, 0x0000, 0x019d, 0x0000,
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x01ab, 0x0000, 0x01ae,
0x0000, 0x01b0, 0x0000, 0x01b2, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x0000, 0x01b9, 0x0000, 0x01bb, 0x0000, 0x01bd, 0x0000,
// Entry 1C0 - 1FF
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x01c5,
0x01c5, 0x01c5, 0x0000, 0x01ca, 0x0000, 0x01cc, 0x01cc, 0x0000,
0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x01d9, 0x0000, 0x01dc, 0x0000, 0x01de,
0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
0x01ee, 0x01ee, 0x0000, 0x01f2, 0x0000, 0x01f4, 0x0000, 0x01f6,
0x0000, 0x01f8, 0x0000, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x01fd,
// Entry 200 - 23F
0x0000, 0x0200, 0x0000, 0x0202, 0x0202, 0x0000, 0x0205, 0x0205,
0x0000, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208,
0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0214, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x021a, 0x0000, 0x0000, 0x021d, 0x0000,
0x021f, 0x021f, 0x0000, 0x0222, 0x0000, 0x0224, 0x0224, 0x0000,
0x0000, 0x0228, 0x0227, 0x0227, 0x0000, 0x0000, 0x022d, 0x0000,
0x022f, 0x0000, 0x0231, 0x0000, 0x023d, 0x0233, 0x023d, 0x023d,
0x023d, 0x023d, 0x023d, 0x023d, 0x023d, 0x0233, 0x023d, 0x023d,
// Entry 240 - 27F
0x0000, 0x0240, 0x0240, 0x0240, 0x0000, 0x0244, 0x0000, 0x0246,
0x0000, 0x0248, 0x0248, 0x0000, 0x024b, 0x0000, 0x024d, 0x024d,
0x024d, 0x024d, 0x024d, 0x024d, 0x0000, 0x0254, 0x0000, 0x0256,
0x0000, 0x0258, 0x0000, 0x025a, 0x0000, 0x025c, 0x0000, 0x0000,
0x025f, 0x025f, 0x025f, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
0x0267, 0x0000, 0x0000, 0x026a, 0x0269, 0x0269, 0x0000, 0x026e,
0x0000, 0x0270, 0x0000, 0x0272, 0x0000, 0x0000, 0x0000, 0x0000,
0x0277, 0x0000, 0x0000, 0x027a, 0x0000, 0x027c, 0x027c, 0x027c,
// Entry 280 - 2BF
0x027c, 0x0000, 0x0281, 0x0281, 0x0281, 0x0000, 0x0285, 0x0285,
0x0285, 0x0285, 0x0285, 0x0000, 0x028b, 0x028b, 0x028b, 0x028b,
0x0000, 0x0000, 0x0000, 0x0000, 0x0293, 0x0293, 0x0293, 0x0000,
0x0297, 0x0297, 0x0297, 0x0297, 0x0000, 0x0000, 0x029d, 0x029d,
0x029d, 0x029d, 0x0000, 0x02a2, 0x0000, 0x02a4, 0x02a4, 0x0000,
0x02a7, 0x0000, 0x02a9, 0x02a9, 0x0000, 0x0000, 0x02ad, 0x0000,
0x0000, 0x02b0, 0x0000, 0x02b2, 0x02b2, 0x0000, 0x0000, 0x02b6,
0x0000, 0x02b8, 0x0000, 0x02ba, 0x0000, 0x02bc, 0x0000, 0x02be,
// Entry 2C0 - 2FF
0x02be, 0x0000, 0x0000, 0x02c2, 0x0000, 0x02c4, 0x02c1, 0x02c1,
0x0000, 0x0000, 0x02c9, 0x02c8, 0x02c8, 0x0000, 0x0000, 0x02ce,
0x0000, 0x02d0, 0x0000, 0x02d2, 0x0000, 0x0000, 0x02d5, 0x0000,
0x0000, 0x0000, 0x02d9, 0x0000, 0x02db, 0x0000, 0x02dd, 0x0000,
0x02df, 0x02df, 0x0000, 0x02e2, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x02e6, 0x02e6, 0x02e6, 0x02e6, 0x0000, 0x02ec, 0x02ed, 0x02ec,
0x0000, 0x02f0,
} // Size: 1532 bytes
// Total table size 1532 bytes (1KiB); checksum: 90718A2

View File

@ -1,43 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"class.go",
"context.go",
"doc.go",
"nickname.go",
"options.go",
"profile.go",
"profiles.go",
"tables.go",
"transformer.go",
"trieval.go",
],
importmap = "k8s.io/kubernetes/vendor/golang.org/x/text/secure/precis",
importpath = "golang.org/x/text/secure/precis",
visibility = ["//visibility:public"],
deps = [
"//vendor/golang.org/x/text/cases:go_default_library",
"//vendor/golang.org/x/text/language:go_default_library",
"//vendor/golang.org/x/text/runes:go_default_library",
"//vendor/golang.org/x/text/secure/bidirule:go_default_library",
"//vendor/golang.org/x/text/transform:go_default_library",
"//vendor/golang.org/x/text/unicode/norm:go_default_library",
"//vendor/golang.org/x/text/width:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -1,36 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package precis
import (
"unicode/utf8"
)
// TODO: Add contextual character rules from Appendix A of RFC5892.
// A class is a set of characters that match certain derived properties. The
// PRECIS framework defines two classes: The Freeform class and the Identifier
// class. The freeform class should be used for profiles where expressiveness is
// prioritized over safety such as nicknames or passwords. The identifier class
// should be used for profiles where safety is the first priority such as
// addressable network labels and usernames.
type class struct {
validFrom property
}
// Contains satisfies the runes.Set interface and returns whether the given rune
// is a member of the class.
func (c class) Contains(r rune) bool {
b := make([]byte, 4)
n := utf8.EncodeRune(b, r)
trieval, _ := dpTrie.lookup(b[:n])
return c.validFrom <= property(trieval)
}
var (
identifier = &class{validFrom: pValid}
freeform = &class{validFrom: idDisOrFreePVal}
)

View File

@ -1,139 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package precis
import "errors"
// This file contains tables and code related to context rules.
type catBitmap uint16
const (
// These bits, once set depending on the current value, are never unset.
bJapanese catBitmap = 1 << iota
bArabicIndicDigit
bExtendedArabicIndicDigit
// These bits are set on each iteration depending on the current value.
bJoinStart
bJoinMid
bJoinEnd
bVirama
bLatinSmallL
bGreek
bHebrew
// These bits indicated which of the permanent bits need to be set at the
// end of the checks.
bMustHaveJapn
permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn
)
const finalShift = 10
var errContext = errors.New("precis: contextual rule violated")
func init() {
// Programmatically set these required bits as, manually setting them seems
// too error prone.
for i, ct := range categoryTransitions {
categoryTransitions[i].keep |= permanent
categoryTransitions[i].accept |= ct.term
}
}
var categoryTransitions = []struct {
keep catBitmap // mask selecting which bits to keep from the previous state
set catBitmap // mask for which bits to set for this transition
// These bitmaps are used for rules that require lookahead.
// term&accept == term must be true, which is enforced programmatically.
term catBitmap // bits accepted as termination condition
accept catBitmap // bits that pass, but not sufficient as termination
// The rule function cannot take a *context as an argument, as it would
// cause the context to escape, adding significant overhead.
rule func(beforeBits catBitmap) (doLookahead bool, err error)
}{
joiningL: {set: bJoinStart},
joiningD: {set: bJoinStart | bJoinEnd},
joiningT: {keep: bJoinStart, set: bJoinMid},
joiningR: {set: bJoinEnd},
viramaModifier: {set: bVirama},
viramaJoinT: {set: bVirama | bJoinMid},
latinSmallL: {set: bLatinSmallL},
greek: {set: bGreek},
greekJoinT: {set: bGreek | bJoinMid},
hebrew: {set: bHebrew},
hebrewJoinT: {set: bHebrew | bJoinMid},
japanese: {set: bJapanese},
katakanaMiddleDot: {set: bMustHaveJapn},
zeroWidthNonJoiner: {
term: bJoinEnd,
accept: bJoinMid,
rule: func(before catBitmap) (doLookAhead bool, err error) {
if before&bVirama != 0 {
return false, nil
}
if before&bJoinStart == 0 {
return false, errContext
}
return true, nil
},
},
zeroWidthJoiner: {
rule: func(before catBitmap) (doLookAhead bool, err error) {
if before&bVirama == 0 {
err = errContext
}
return false, err
},
},
middleDot: {
term: bLatinSmallL,
rule: func(before catBitmap) (doLookAhead bool, err error) {
if before&bLatinSmallL == 0 {
return false, errContext
}
return true, nil
},
},
greekLowerNumeralSign: {
set: bGreek,
term: bGreek,
rule: func(before catBitmap) (doLookAhead bool, err error) {
return true, nil
},
},
hebrewPreceding: {
set: bHebrew,
rule: func(before catBitmap) (doLookAhead bool, err error) {
if before&bHebrew == 0 {
err = errContext
}
return false, err
},
},
arabicIndicDigit: {
set: bArabicIndicDigit,
rule: func(before catBitmap) (doLookAhead bool, err error) {
if before&bExtendedArabicIndicDigit != 0 {
err = errContext
}
return false, err
},
},
extendedArabicIndicDigit: {
set: bExtendedArabicIndicDigit,
rule: func(before catBitmap) (doLookAhead bool, err error) {
if before&bArabicIndicDigit != 0 {
err = errContext
}
return false, err
},
},
}

View File

@ -1,14 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package precis contains types and functions for the preparation,
// enforcement, and comparison of internationalized strings ("PRECIS") as
// defined in RFC 7564. It also contains several pre-defined profiles for
// passwords, nicknames, and usernames as defined in RFC 7613 and RFC 7700.
//
// BE ADVISED: This package is under construction and the API may change in
// backwards incompatible ways and without notice.
package precis // import "golang.org/x/text/secure/precis"
//go:generate go run gen.go gen_trieval.go

View File

@ -1,310 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Unicode table generator.
// Data read from the web.
// +build ignore
package main
import (
"flag"
"log"
"unicode"
"unicode/utf8"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/unicode/rangetable"
)
var outputFile = flag.String("output", "tables.go", "output file for generated tables; default tables.go")
var assigned, disallowedRunes *unicode.RangeTable
var runeCategory = map[rune]category{}
var overrides = map[category]category{
viramaModifier: viramaJoinT,
greek: greekJoinT,
hebrew: hebrewJoinT,
}
func setCategory(r rune, cat category) {
if c, ok := runeCategory[r]; ok {
if override, ok := overrides[c]; cat == joiningT && ok {
cat = override
} else {
log.Fatalf("%U: multiple categories for rune (%v and %v)", r, c, cat)
}
}
runeCategory[r] = cat
}
func init() {
if numCategories > 1<<propShift {
log.Fatalf("Number of categories is %d; may at most be %d", numCategories, 1<<propShift)
}
}
func main() {
gen.Init()
// Load data
runes := []rune{}
// PrecisIgnorableProperties: https://tools.ietf.org/html/rfc7564#section-9.13
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
if p.String(1) == "Default_Ignorable_Code_Point" {
runes = append(runes, p.Rune(0))
}
})
ucd.Parse(gen.OpenUCDFile("PropList.txt"), func(p *ucd.Parser) {
switch p.String(1) {
case "Noncharacter_Code_Point":
runes = append(runes, p.Rune(0))
}
})
// OldHangulJamo: https://tools.ietf.org/html/rfc5892#section-2.9
ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) {
switch p.String(1) {
case "L", "V", "T":
runes = append(runes, p.Rune(0))
}
})
disallowedRunes = rangetable.New(runes...)
assigned = rangetable.Assigned(unicode.Version)
// Load category data.
runeCategory['l'] = latinSmallL
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
const cccVirama = 9
if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
setCategory(p.Rune(0), viramaModifier)
}
})
ucd.Parse(gen.OpenUCDFile("Scripts.txt"), func(p *ucd.Parser) {
switch p.String(1) {
case "Greek":
setCategory(p.Rune(0), greek)
case "Hebrew":
setCategory(p.Rune(0), hebrew)
case "Hiragana", "Katakana", "Han":
setCategory(p.Rune(0), japanese)
}
})
// Set the rule categories associated with exceptions. This overrides any
// previously set categories. The original categories are manually
// reintroduced in the categoryTransitions table.
for r, e := range exceptions {
if e.cat != 0 {
runeCategory[r] = e.cat
}
}
cat := map[string]category{
"L": joiningL,
"D": joiningD,
"T": joiningT,
"R": joiningR,
}
ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
switch v := p.String(1); v {
case "L", "D", "T", "R":
setCategory(p.Rune(0), cat[v])
}
})
writeTables()
gen.Repackage("gen_trieval.go", "trieval.go", "precis")
}
type exception struct {
prop property
cat category
}
func init() {
// Programmatically add the Arabic and Indic digits to the exceptions map.
// See comment in the exceptions map below why these are marked disallowed.
for i := rune(0); i <= 9; i++ {
exceptions[0x0660+i] = exception{
prop: disallowed,
cat: arabicIndicDigit,
}
exceptions[0x06F0+i] = exception{
prop: disallowed,
cat: extendedArabicIndicDigit,
}
}
}
// The Exceptions class as defined in RFC 5892
// https://tools.ietf.org/html/rfc5892#section-2.6
var exceptions = map[rune]exception{
0x00DF: {prop: pValid},
0x03C2: {prop: pValid},
0x06FD: {prop: pValid},
0x06FE: {prop: pValid},
0x0F0B: {prop: pValid},
0x3007: {prop: pValid},
// ContextO|J rules are marked as disallowed, taking a "guilty until proven
// innocent" approach. The main reason for this is that the check for
// whether a context rule should be applied can be moved to the logic for
// handing disallowed runes, taken it off the common path. The exception to
// this rule is for katakanaMiddleDot, as the rule logic is handled without
// using a rule function.
// ContextJ (Join control)
0x200C: {prop: disallowed, cat: zeroWidthNonJoiner},
0x200D: {prop: disallowed, cat: zeroWidthJoiner},
// ContextO
0x00B7: {prop: disallowed, cat: middleDot},
0x0375: {prop: disallowed, cat: greekLowerNumeralSign},
0x05F3: {prop: disallowed, cat: hebrewPreceding}, // punctuation Geresh
0x05F4: {prop: disallowed, cat: hebrewPreceding}, // punctuation Gershayim
0x30FB: {prop: pValid, cat: katakanaMiddleDot},
// These are officially ContextO, but the implementation does not require
// special treatment of these, so we simply mark them as valid.
0x0660: {prop: pValid},
0x0661: {prop: pValid},
0x0662: {prop: pValid},
0x0663: {prop: pValid},
0x0664: {prop: pValid},
0x0665: {prop: pValid},
0x0666: {prop: pValid},
0x0667: {prop: pValid},
0x0668: {prop: pValid},
0x0669: {prop: pValid},
0x06F0: {prop: pValid},
0x06F1: {prop: pValid},
0x06F2: {prop: pValid},
0x06F3: {prop: pValid},
0x06F4: {prop: pValid},
0x06F5: {prop: pValid},
0x06F6: {prop: pValid},
0x06F7: {prop: pValid},
0x06F8: {prop: pValid},
0x06F9: {prop: pValid},
0x0640: {prop: disallowed},
0x07FA: {prop: disallowed},
0x302E: {prop: disallowed},
0x302F: {prop: disallowed},
0x3031: {prop: disallowed},
0x3032: {prop: disallowed},
0x3033: {prop: disallowed},
0x3034: {prop: disallowed},
0x3035: {prop: disallowed},
0x303B: {prop: disallowed},
}
// LetterDigits: https://tools.ietf.org/html/rfc5892#section-2.1
// r in {Ll, Lu, Lo, Nd, Lm, Mn, Mc}.
func isLetterDigits(r rune) bool {
return unicode.In(r,
unicode.Ll, unicode.Lu, unicode.Lm, unicode.Lo, // Letters
unicode.Mn, unicode.Mc, // Modifiers
unicode.Nd, // Digits
)
}
func isIdDisAndFreePVal(r rune) bool {
return unicode.In(r,
// OtherLetterDigits: https://tools.ietf.org/html/rfc7564#section-9.18
// r in in {Lt, Nl, No, Me}
unicode.Lt, unicode.Nl, unicode.No, // Other letters / numbers
unicode.Me, // Modifiers
// Spaces: https://tools.ietf.org/html/rfc7564#section-9.14
// r in in {Zs}
unicode.Zs,
// Symbols: https://tools.ietf.org/html/rfc7564#section-9.15
// r in {Sm, Sc, Sk, So}
unicode.Sm, unicode.Sc, unicode.Sk, unicode.So,
// Punctuation: https://tools.ietf.org/html/rfc7564#section-9.16
// r in {Pc, Pd, Ps, Pe, Pi, Pf, Po}
unicode.Pc, unicode.Pd, unicode.Ps, unicode.Pe,
unicode.Pi, unicode.Pf, unicode.Po,
)
}
// HasCompat: https://tools.ietf.org/html/rfc7564#section-9.17
func hasCompat(r rune) bool {
return !norm.NFKC.IsNormalString(string(r))
}
// From https://tools.ietf.org/html/rfc5892:
//
// If .cp. .in. Exceptions Then Exceptions(cp);
// Else If .cp. .in. BackwardCompatible Then BackwardCompatible(cp);
// Else If .cp. .in. Unassigned Then UNASSIGNED;
// Else If .cp. .in. ASCII7 Then PVALID;
// Else If .cp. .in. JoinControl Then CONTEXTJ;
// Else If .cp. .in. OldHangulJamo Then DISALLOWED;
// Else If .cp. .in. PrecisIgnorableProperties Then DISALLOWED;
// Else If .cp. .in. Controls Then DISALLOWED;
// Else If .cp. .in. HasCompat Then ID_DIS or FREE_PVAL;
// Else If .cp. .in. LetterDigits Then PVALID;
// Else If .cp. .in. OtherLetterDigits Then ID_DIS or FREE_PVAL;
// Else If .cp. .in. Spaces Then ID_DIS or FREE_PVAL;
// Else If .cp. .in. Symbols Then ID_DIS or FREE_PVAL;
// Else If .cp. .in. Punctuation Then ID_DIS or FREE_PVAL;
// Else DISALLOWED;
func writeTables() {
propTrie := triegen.NewTrie("derivedProperties")
w := gen.NewCodeWriter()
defer w.WriteGoFile(*outputFile, "precis")
gen.WriteUnicodeVersion(w)
// Iterate over all the runes...
for i := rune(0); i < unicode.MaxRune; i++ {
r := rune(i)
if !utf8.ValidRune(r) {
continue
}
e, ok := exceptions[i]
p := e.prop
switch {
case ok:
case !unicode.In(r, assigned):
p = unassigned
case r >= 0x0021 && r <= 0x007e: // Is ASCII 7
p = pValid
case unicode.In(r, disallowedRunes, unicode.Cc):
p = disallowed
case hasCompat(r):
p = idDisOrFreePVal
case isLetterDigits(r):
p = pValid
case isIdDisAndFreePVal(r):
p = idDisOrFreePVal
default:
p = disallowed
}
cat := runeCategory[r]
// Don't set category for runes that are disallowed.
if p == disallowed {
cat = exceptions[r].cat
}
propTrie.Insert(r, uint64(p)|uint64(cat))
}
sz, err := propTrie.Gen(w)
if err != nil {
log.Fatal(err)
}
w.Size += sz
}

View File

@ -1,68 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// entry is the entry of a trie table
// 7..6 property (unassigned, disallowed, maybe, valid)
// 5..0 category
type entry uint8
const (
propShift = 6
propMask = 0xc0
catMask = 0x3f
)
func (e entry) property() property { return property(e & propMask) }
func (e entry) category() category { return category(e & catMask) }
type property uint8
// The order of these constants matter. A Profile may consider runes to be
// allowed either from pValid or idDisOrFreePVal.
const (
unassigned property = iota << propShift
disallowed
idDisOrFreePVal // disallowed for Identifier, pValid for FreeForm
pValid
)
// compute permutations of all properties and specialCategories.
type category uint8
const (
other category = iota
// Special rune types
joiningL
joiningD
joiningT
joiningR
viramaModifier
viramaJoinT // Virama + JoiningT
latinSmallL // U+006c
greek
greekJoinT // Greek + JoiningT
hebrew
hebrewJoinT // Hebrew + JoiningT
japanese // hirigana, katakana, han
// Special rune types associated with contextual rules defined in
// https://tools.ietf.org/html/rfc5892#appendix-A.
// ContextO
zeroWidthNonJoiner // rule 1
zeroWidthJoiner // rule 2
// ContextJ
middleDot // rule 3
greekLowerNumeralSign // rule 4
hebrewPreceding // rule 5 and 6
katakanaMiddleDot // rule 7
arabicIndicDigit // rule 8
extendedArabicIndicDigit // rule 9
numCategories
)

View File

@ -1,70 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package precis
import (
"unicode"
"unicode/utf8"
"golang.org/x/text/transform"
)
type nickAdditionalMapping struct {
// TODO: This transformer needs to be stateless somehow…
notStart bool
prevSpace bool
}
func (t *nickAdditionalMapping) Reset() {
t.prevSpace = false
t.notStart = false
}
func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// RFC 7700 §2.1. Rules
//
// 2. Additional Mapping Rule: The additional mapping rule consists of
// the following sub-rules.
//
// 1. Any instances of non-ASCII space MUST be mapped to ASCII
// space (U+0020); a non-ASCII space is any Unicode code point
// having a general category of "Zs", naturally with the
// exception of U+0020.
//
// 2. Any instances of the ASCII space character at the beginning
// or end of a nickname MUST be removed (e.g., "stpeter " is
// mapped to "stpeter").
//
// 3. Interior sequences of more than one ASCII space character
// MUST be mapped to a single ASCII space character (e.g.,
// "St Peter" is mapped to "St Peter").
for nSrc < len(src) {
r, size := utf8.DecodeRune(src[nSrc:])
if size == 0 { // Incomplete UTF-8 encoding
if !atEOF {
return nDst, nSrc, transform.ErrShortSrc
}
size = 1
}
if unicode.Is(unicode.Zs, r) {
t.prevSpace = true
} else {
if t.prevSpace && t.notStart {
dst[nDst] = ' '
nDst += 1
}
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
nDst += size
return nDst, nSrc, transform.ErrShortDst
}
nDst += size
t.prevSpace = false
t.notStart = true
}
nSrc += size
}
return nDst, nSrc, nil
}

View File

@ -1,153 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package precis
import (
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// An Option is used to define the behavior and rules of a Profile.
type Option func(*options)
type options struct {
// Preparation options
foldWidth bool
// Enforcement options
asciiLower bool
cases transform.SpanningTransformer
disallow runes.Set
norm transform.SpanningTransformer
additional []func() transform.SpanningTransformer
width transform.SpanningTransformer
disallowEmpty bool
bidiRule bool
// Comparison options
ignorecase bool
}
func getOpts(o ...Option) (res options) {
for _, f := range o {
f(&res)
}
// Using a SpanningTransformer, instead of norm.Form prevents an allocation
// down the road.
if res.norm == nil {
res.norm = norm.NFC
}
return
}
var (
// The IgnoreCase option causes the profile to perform a case insensitive
// comparison during the PRECIS comparison step.
IgnoreCase Option = ignoreCase
// The FoldWidth option causes the profile to map non-canonical wide and
// narrow variants to their decomposition mapping. This is useful for
// profiles that are based on the identifier class which would otherwise
// disallow such characters.
FoldWidth Option = foldWidth
// The DisallowEmpty option causes the enforcement step to return an error if
// the resulting string would be empty.
DisallowEmpty Option = disallowEmpty
// The BidiRule option causes the Bidi Rule defined in RFC 5893 to be
// applied.
BidiRule Option = bidiRule
)
var (
ignoreCase = func(o *options) {
o.ignorecase = true
}
foldWidth = func(o *options) {
o.foldWidth = true
}
disallowEmpty = func(o *options) {
o.disallowEmpty = true
}
bidiRule = func(o *options) {
o.bidiRule = true
}
)
// TODO: move this logic to package transform
type spanWrap struct{ transform.Transformer }
func (s spanWrap) Span(src []byte, atEOF bool) (n int, err error) {
return 0, transform.ErrEndOfSpan
}
// TODO: allow different types? For instance:
// func() transform.Transformer
// func() transform.SpanningTransformer
// func([]byte) bool // validation only
//
// Also, would be great if we could detect if a transformer is reentrant.
// The AdditionalMapping option defines the additional mapping rule for the
// Profile by applying Transformer's in sequence.
func AdditionalMapping(t ...func() transform.Transformer) Option {
return func(o *options) {
for _, f := range t {
sf := func() transform.SpanningTransformer {
return f().(transform.SpanningTransformer)
}
if _, ok := f().(transform.SpanningTransformer); !ok {
sf = func() transform.SpanningTransformer {
return spanWrap{f()}
}
}
o.additional = append(o.additional, sf)
}
}
}
// The Norm option defines a Profile's normalization rule. Defaults to NFC.
func Norm(f norm.Form) Option {
return func(o *options) {
o.norm = f
}
}
// The FoldCase option defines a Profile's case mapping rule. Options can be
// provided to determine the type of case folding used.
func FoldCase(opts ...cases.Option) Option {
return func(o *options) {
o.asciiLower = true
o.cases = cases.Fold(opts...)
}
}
// The LowerCase option defines a Profile's case mapping rule. Options can be
// provided to determine the type of case folding used.
func LowerCase(opts ...cases.Option) Option {
return func(o *options) {
o.asciiLower = true
if len(opts) == 0 {
o.cases = cases.Lower(language.Und, cases.HandleFinalSigma(false))
return
}
opts = append([]cases.Option{cases.HandleFinalSigma(false)}, opts...)
o.cases = cases.Lower(language.Und, opts...)
}
}
// The Disallow option further restricts a Profile's allowed characters beyond
// what is disallowed by the underlying string class.
func Disallow(set runes.Set) Option {
return func(o *options) {
o.disallow = set
}
}

View File

@ -1,378 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package precis
import (
"bytes"
"errors"
"unicode/utf8"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/runes"
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/transform"
"golang.org/x/text/width"
)
var (
errDisallowedRune = errors.New("precis: disallowed rune encountered")
)
var dpTrie = newDerivedPropertiesTrie(0)
// A Profile represents a set of rules for normalizing and validating strings in
// the PRECIS framework.
type Profile struct {
options
class *class
}
// NewIdentifier creates a new PRECIS profile based on the Identifier string
// class. Profiles created from this class are suitable for use where safety is
// prioritized over expressiveness like network identifiers, user accounts, chat
// rooms, and file names.
func NewIdentifier(opts ...Option) *Profile {
return &Profile{
options: getOpts(opts...),
class: identifier,
}
}
// NewFreeform creates a new PRECIS profile based on the Freeform string class.
// Profiles created from this class are suitable for use where expressiveness is
// prioritized over safety like passwords, and display-elements such as
// nicknames in a chat room.
func NewFreeform(opts ...Option) *Profile {
return &Profile{
options: getOpts(opts...),
class: freeform,
}
}
// NewTransformer creates a new transform.Transformer that performs the PRECIS
// preparation and enforcement steps on the given UTF-8 encoded bytes.
func (p *Profile) NewTransformer() *Transformer {
var ts []transform.Transformer
// These transforms are applied in the order defined in
// https://tools.ietf.org/html/rfc7564#section-7
if p.options.foldWidth {
ts = append(ts, width.Fold)
}
for _, f := range p.options.additional {
ts = append(ts, f())
}
if p.options.cases != nil {
ts = append(ts, p.options.cases)
}
ts = append(ts, p.options.norm)
if p.options.bidiRule {
ts = append(ts, bidirule.New())
}
ts = append(ts, &checker{p: p, allowed: p.Allowed()})
// TODO: Add the disallow empty rule with a dummy transformer?
return &Transformer{transform.Chain(ts...)}
}
var errEmptyString = errors.New("precis: transformation resulted in empty string")
type buffers struct {
src []byte
buf [2][]byte
next int
}
func (b *buffers) apply(t transform.SpanningTransformer) (err error) {
n, err := t.Span(b.src, true)
if err != transform.ErrEndOfSpan {
return err
}
x := b.next & 1
if b.buf[x] == nil {
b.buf[x] = make([]byte, 0, 8+len(b.src)+len(b.src)>>2)
}
span := append(b.buf[x][:0], b.src[:n]...)
b.src, _, err = transform.Append(t, span, b.src[n:])
b.buf[x] = b.src
b.next++
return err
}
// Pre-allocate transformers when possible. In some cases this avoids allocation.
var (
foldWidthT transform.SpanningTransformer = width.Fold
lowerCaseT transform.SpanningTransformer = cases.Lower(language.Und, cases.HandleFinalSigma(false))
)
// TODO: make this a method on profile.
func (b *buffers) enforce(p *Profile, src []byte, comparing bool) (str []byte, err error) {
b.src = src
ascii := true
for _, c := range src {
if c >= utf8.RuneSelf {
ascii = false
break
}
}
// ASCII fast path.
if ascii {
for _, f := range p.options.additional {
if err = b.apply(f()); err != nil {
return nil, err
}
}
switch {
case p.options.asciiLower || (comparing && p.options.ignorecase):
for i, c := range b.src {
if 'A' <= c && c <= 'Z' {
b.src[i] = c ^ 1<<5
}
}
case p.options.cases != nil:
b.apply(p.options.cases)
}
c := checker{p: p}
if _, err := c.span(b.src, true); err != nil {
return nil, err
}
if p.disallow != nil {
for _, c := range b.src {
if p.disallow.Contains(rune(c)) {
return nil, errDisallowedRune
}
}
}
if p.options.disallowEmpty && len(b.src) == 0 {
return nil, errEmptyString
}
return b.src, nil
}
// These transforms are applied in the order defined in
// https://tools.ietf.org/html/rfc7564#section-7
// TODO: allow different width transforms options.
if p.options.foldWidth || (p.options.ignorecase && comparing) {
b.apply(foldWidthT)
}
for _, f := range p.options.additional {
if err = b.apply(f()); err != nil {
return nil, err
}
}
if p.options.cases != nil {
b.apply(p.options.cases)
}
if comparing && p.options.ignorecase {
b.apply(lowerCaseT)
}
b.apply(p.norm)
if p.options.bidiRule && !bidirule.Valid(b.src) {
return nil, bidirule.ErrInvalid
}
c := checker{p: p}
if _, err := c.span(b.src, true); err != nil {
return nil, err
}
if p.disallow != nil {
for i := 0; i < len(b.src); {
r, size := utf8.DecodeRune(b.src[i:])
if p.disallow.Contains(r) {
return nil, errDisallowedRune
}
i += size
}
}
if p.options.disallowEmpty && len(b.src) == 0 {
return nil, errEmptyString
}
return b.src, nil
}
// Append appends the result of applying p to src writing the result to dst.
// It returns an error if the input string is invalid.
func (p *Profile) Append(dst, src []byte) ([]byte, error) {
var buf buffers
b, err := buf.enforce(p, src, false)
if err != nil {
return nil, err
}
return append(dst, b...), nil
}
func processBytes(p *Profile, b []byte, key bool) ([]byte, error) {
var buf buffers
b, err := buf.enforce(p, b, key)
if err != nil {
return nil, err
}
if buf.next == 0 {
c := make([]byte, len(b))
copy(c, b)
return c, nil
}
return b, nil
}
// Bytes returns a new byte slice with the result of applying the profile to b.
func (p *Profile) Bytes(b []byte) ([]byte, error) {
return processBytes(p, b, false)
}
// AppendCompareKey appends the result of applying p to src (including any
// optional rules to make strings comparable or useful in a map key such as
// applying lowercasing) writing the result to dst. It returns an error if the
// input string is invalid.
func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error) {
var buf buffers
b, err := buf.enforce(p, src, true)
if err != nil {
return nil, err
}
return append(dst, b...), nil
}
func processString(p *Profile, s string, key bool) (string, error) {
var buf buffers
b, err := buf.enforce(p, []byte(s), key)
if err != nil {
return "", err
}
return string(b), nil
}
// String returns a string with the result of applying the profile to s.
func (p *Profile) String(s string) (string, error) {
return processString(p, s, false)
}
// CompareKey returns a string that can be used for comparison, hashing, or
// collation.
func (p *Profile) CompareKey(s string) (string, error) {
return processString(p, s, true)
}
// Compare enforces both strings, and then compares them for bit-string identity
// (byte-for-byte equality). If either string cannot be enforced, the comparison
// is false.
func (p *Profile) Compare(a, b string) bool {
var buf buffers
akey, err := buf.enforce(p, []byte(a), true)
if err != nil {
return false
}
buf = buffers{}
bkey, err := buf.enforce(p, []byte(b), true)
if err != nil {
return false
}
return bytes.Compare(akey, bkey) == 0
}
// Allowed returns a runes.Set containing every rune that is a member of the
// underlying profile's string class and not disallowed by any profile specific
// rules.
func (p *Profile) Allowed() runes.Set {
if p.options.disallow != nil {
return runes.Predicate(func(r rune) bool {
return p.class.Contains(r) && !p.options.disallow.Contains(r)
})
}
return p.class
}
type checker struct {
p *Profile
allowed runes.Set
beforeBits catBitmap
termBits catBitmap
acceptBits catBitmap
}
func (c *checker) Reset() {
c.beforeBits = 0
c.termBits = 0
c.acceptBits = 0
}
func (c *checker) span(src []byte, atEOF bool) (n int, err error) {
for n < len(src) {
e, sz := dpTrie.lookup(src[n:])
d := categoryTransitions[category(e&catMask)]
if sz == 0 {
if !atEOF {
return n, transform.ErrShortSrc
}
return n, errDisallowedRune
}
doLookAhead := false
if property(e) < c.p.class.validFrom {
if d.rule == nil {
return n, errDisallowedRune
}
doLookAhead, err = d.rule(c.beforeBits)
if err != nil {
return n, err
}
}
c.beforeBits &= d.keep
c.beforeBits |= d.set
if c.termBits != 0 {
// We are currently in an unterminated lookahead.
if c.beforeBits&c.termBits != 0 {
c.termBits = 0
c.acceptBits = 0
} else if c.beforeBits&c.acceptBits == 0 {
// Invalid continuation of the unterminated lookahead sequence.
return n, errContext
}
}
if doLookAhead {
if c.termBits != 0 {
// A previous lookahead run has not been terminated yet.
return n, errContext
}
c.termBits = d.term
c.acceptBits = d.accept
}
n += sz
}
if m := c.beforeBits >> finalShift; c.beforeBits&m != m || c.termBits != 0 {
err = errContext
}
return n, err
}
// TODO: we may get rid of this transform if transform.Chain understands
// something like a Spanner interface.
func (c checker) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
short := false
if len(dst) < len(src) {
src = src[:len(dst)]
atEOF = false
short = true
}
nSrc, err = c.span(src, atEOF)
nDst = copy(dst, src[:nSrc])
if short && (err == transform.ErrShortSrc || err == nil) {
err = transform.ErrShortDst
}
return nDst, nSrc, err
}

View File

@ -1,78 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package precis
import (
"unicode"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
var (
// Implements the Nickname profile specified in RFC 7700.
// The nickname profile is not idempotent and may need to be applied multiple
// times before being used for comparisons.
Nickname *Profile = nickname
// Implements the UsernameCaseMapped profile specified in RFC 7613.
UsernameCaseMapped *Profile = usernameCaseMap
// Implements the UsernameCasePreserved profile specified in RFC 7613.
UsernameCasePreserved *Profile = usernameNoCaseMap
// Implements the OpaqueString profile defined in RFC 7613 for passwords and other secure labels.
OpaqueString *Profile = opaquestring
)
var (
nickname = &Profile{
options: getOpts(
AdditionalMapping(func() transform.Transformer {
return &nickAdditionalMapping{}
}),
IgnoreCase,
Norm(norm.NFKC),
DisallowEmpty,
),
class: freeform,
}
usernameCaseMap = &Profile{
options: getOpts(
FoldWidth,
LowerCase(),
Norm(norm.NFC),
BidiRule,
),
class: identifier,
}
usernameNoCaseMap = &Profile{
options: getOpts(
FoldWidth,
Norm(norm.NFC),
BidiRule,
),
class: identifier,
}
opaquestring = &Profile{
options: getOpts(
AdditionalMapping(func() transform.Transformer {
return mapSpaces
}),
Norm(norm.NFC),
DisallowEmpty,
),
class: freeform,
}
)
// mapSpaces is a shared value of a runes.Map transformer.
var mapSpaces transform.Transformer = runes.Map(func(r rune) rune {
if unicode.Is(unicode.Zs, r) {
return ' '
}
return r
})

File diff suppressed because it is too large Load Diff

View File

@ -1,32 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package precis
import "golang.org/x/text/transform"
// Transformer implements the transform.Transformer interface.
type Transformer struct {
t transform.Transformer
}
// Reset implements the transform.Transformer interface.
func (t Transformer) Reset() { t.t.Reset() }
// Transform implements the transform.Transformer interface.
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return t.t.Transform(dst, src, atEOF)
}
// Bytes returns a new byte slice with the result of applying t to b.
func (t Transformer) Bytes(b []byte) []byte {
b, _, _ = transform.Bytes(t, b)
return b
}
// String returns a string with the result of applying t to s.
func (t Transformer) String(s string) string {
s, _, _ = transform.String(t, s)
return s
}

View File

@ -1,64 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package precis
// entry is the entry of a trie table
// 7..6 property (unassigned, disallowed, maybe, valid)
// 5..0 category
type entry uint8
const (
propShift = 6
propMask = 0xc0
catMask = 0x3f
)
func (e entry) property() property { return property(e & propMask) }
func (e entry) category() category { return category(e & catMask) }
type property uint8
// The order of these constants matter. A Profile may consider runes to be
// allowed either from pValid or idDisOrFreePVal.
const (
unassigned property = iota << propShift
disallowed
idDisOrFreePVal // disallowed for Identifier, pValid for FreeForm
pValid
)
// compute permutations of all properties and specialCategories.
type category uint8
const (
other category = iota
// Special rune types
joiningL
joiningD
joiningT
joiningR
viramaModifier
viramaJoinT // Virama + JoiningT
latinSmallL // U+006c
greek
greekJoinT // Greek + JoiningT
hebrew
hebrewJoinT // Hebrew + JoiningT
japanese // hirigana, katakana, han
// Special rune types associated with contextual rules defined in
// https://tools.ietf.org/html/rfc5892#appendix-A.
// ContextO
zeroWidthNonJoiner // rule 1
zeroWidthJoiner // rule 2
// ContextJ
middleDot // rule 3
greekLowerNumeralSign // rule 4
hebrewPreceding // rule 5 and 6
katakanaMiddleDot // rule 7
arabicIndicDigit // rule 8
extendedArabicIndicDigit // rule 9
numCategories
)

5
vendor/modules.txt vendored
View File

@ -60,7 +60,7 @@ github.com/Microsoft/hcsshim/internal/wclayer
github.com/NYTimes/gziphandler
# github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 => github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5
github.com/Nvveen/Gotty
# github.com/PuerkitoBio/purell v1.1.0 => github.com/PuerkitoBio/purell v1.0.0
# github.com/PuerkitoBio/purell v1.1.0 => github.com/PuerkitoBio/purell v1.1.0
github.com/PuerkitoBio/purell
# github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 => github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2
github.com/PuerkitoBio/urlesc
@ -957,7 +957,6 @@ golang.org/x/sys/windows
golang.org/x/sys/windows/registry
golang.org/x/sys/windows/svc
# golang.org/x/text v0.3.0 => golang.org/x/text v0.0.0-20170810154203-b19bf474d317
golang.org/x/text/cases
golang.org/x/text/encoding
golang.org/x/text/encoding/charmap
golang.org/x/text/encoding/htmlindex
@ -968,13 +967,11 @@ golang.org/x/text/encoding/korean
golang.org/x/text/encoding/simplifiedchinese
golang.org/x/text/encoding/traditionalchinese
golang.org/x/text/encoding/unicode
golang.org/x/text/internal
golang.org/x/text/internal/tag
golang.org/x/text/internal/utf8internal
golang.org/x/text/language
golang.org/x/text/runes
golang.org/x/text/secure/bidirule
golang.org/x/text/secure/precis
golang.org/x/text/transform
golang.org/x/text/unicode/bidi
golang.org/x/text/unicode/norm