mirror of https://github.com/k3s-io/k3s
update godep
parent
3ec4cd423e
commit
d85823b051
|
@ -762,6 +762,11 @@
|
|||
"ImportPath": "github.com/mitchellh/mapstructure",
|
||||
"Rev": "740c764bc6149d3f1806231418adb9f52c11bcbf"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/mvdan/xurls",
|
||||
"Comment": "v0.8.0-14-g1b768d7",
|
||||
"Rev": "1b768d7c393abd8e8dda1458385a57becd4b2d4e"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/mxk/go-flowrate/flowrate",
|
||||
"Rev": "cca7078d478f8520f85629ad7c68962d31ed7682"
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
cmd/xurls/xurls
|
||||
generate/tldsgen/tldsgen
|
||||
generate/regexgen/regexgen
|
|
@ -0,0 +1,5 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.4.3
|
||||
- 1.5.1
|
|
@ -0,0 +1,27 @@
|
|||
Copyright (c) 2015, Daniel Martí. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,31 @@
|
|||
# xurls
|
||||
|
||||
[![GoDoc](https://godoc.org/github.com/mvdan/xurls?status.svg)](https://godoc.org/github.com/mvdan/xurls) [![Travis](https://travis-ci.org/mvdan/xurls.svg?branch=master)](https://travis-ci.org/mvdan/xurls)
|
||||
|
||||
Extract urls from text using regular expressions.
|
||||
|
||||
go get github.com/mvdan/xurls
|
||||
|
||||
```go
|
||||
import "github.com/mvdan/xurls"
|
||||
|
||||
func main() {
|
||||
xurls.Relaxed.FindString("Do gophers live in golang.org?")
|
||||
// "golang.org"
|
||||
xurls.Relaxed.FindAllString("foo.com is http://foo.com/.", -1)
|
||||
// []string{"foo.com", "http://foo.com/"}
|
||||
xurls.Strict.FindAllString("foo.com is http://foo.com/.", -1)
|
||||
// []string{"http://foo.com/"}
|
||||
}
|
||||
```
|
||||
|
||||
#### cmd/xurls
|
||||
|
||||
Reads text and prints one url per line.
|
||||
|
||||
go get github.com/mvdan/xurls/cmd/xurls
|
||||
|
||||
```shell
|
||||
$ echo "Do gophers live in http://golang.org?" | xurls
|
||||
http://golang.org
|
||||
```
|
|
@ -0,0 +1,83 @@
|
|||
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
|
||||
// See LICENSE for licensing information
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
|
||||
"github.com/mvdan/xurls"
|
||||
)
|
||||
|
||||
var (
|
||||
matching = flag.String("m", "", "")
|
||||
relaxed = flag.Bool("r", false, "")
|
||||
)
|
||||
|
||||
func init() {
|
||||
flag.Usage = func() {
|
||||
p := func(format string, a ...interface{}) {
|
||||
fmt.Fprintf(os.Stderr, format, a...)
|
||||
}
|
||||
p("Usage: xurls [-h] [files]\n\n")
|
||||
p("If no files are given, it reads from standard input.\n\n")
|
||||
p(" -m <regexp> only match urls whose scheme matches a regexp\n")
|
||||
p(" example: 'https?://|mailto:'\n")
|
||||
p(" -r also match urls without a scheme (relaxed)\n")
|
||||
}
|
||||
}
|
||||
|
||||
func scanPath(re *regexp.Regexp, path string) error {
|
||||
r := os.Stdin
|
||||
if path != "-" {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
r = f
|
||||
}
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Split(bufio.ScanWords)
|
||||
for scanner.Scan() {
|
||||
word := scanner.Text()
|
||||
for _, match := range re.FindAllString(word, -1) {
|
||||
fmt.Println(match)
|
||||
}
|
||||
}
|
||||
return scanner.Err()
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
if *relaxed && *matching != "" {
|
||||
errExit(fmt.Errorf("-r and -m at the same time don't make much sense"))
|
||||
}
|
||||
re := xurls.Strict
|
||||
if *relaxed {
|
||||
re = xurls.Relaxed
|
||||
} else if *matching != "" {
|
||||
var err error
|
||||
if re, err = xurls.StrictMatchingScheme(*matching); err != nil {
|
||||
errExit(err)
|
||||
}
|
||||
}
|
||||
args := flag.Args()
|
||||
if len(args) == 0 {
|
||||
args = []string{"-"}
|
||||
}
|
||||
for _, path := range args {
|
||||
if err := scanPath(re, path); err != nil {
|
||||
errExit(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func errExit(err error) {
|
||||
fmt.Fprintf(os.Stderr, "%v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
70
Godeps/_workspace/src/github.com/mvdan/xurls/generate/regexgen/main.go
generated
vendored
Normal file
70
Godeps/_workspace/src/github.com/mvdan/xurls/generate/regexgen/main.go
generated
vendored
Normal file
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
|
||||
// See LICENSE for licensing information
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"text/template"
|
||||
|
||||
"golang.org/x/net/idna"
|
||||
|
||||
"github.com/mvdan/xurls"
|
||||
)
|
||||
|
||||
const path = "regex.go"
|
||||
|
||||
var regexTmpl = template.Must(template.New("regex").Parse(`// Generated by regexgen
|
||||
|
||||
package xurls
|
||||
|
||||
const ({{ range $key, $value := . }}
|
||||
{{$key}} = ` + "`" + `{{$value}}` + "`" + `{{end}}
|
||||
)
|
||||
`))
|
||||
|
||||
func writeRegex(tlds []string) error {
|
||||
allTldsSet := make(map[string]struct{})
|
||||
add := func(tld string) {
|
||||
if _, e := allTldsSet[tld]; e {
|
||||
log.Fatalf("Duplicate TLD: %s", tld)
|
||||
}
|
||||
allTldsSet[tld] = struct{}{}
|
||||
}
|
||||
for _, tldlist := range [...][]string{tlds, xurls.PseudoTLDs} {
|
||||
for _, tld := range tldlist {
|
||||
add(tld)
|
||||
asciiTld, err := idna.ToASCII(tld)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if asciiTld != tld {
|
||||
add(asciiTld)
|
||||
}
|
||||
}
|
||||
}
|
||||
var allTlds []string
|
||||
for tld := range allTldsSet {
|
||||
allTlds = append(allTlds, tld)
|
||||
}
|
||||
sort.Strings(allTlds)
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
return regexTmpl.Execute(f, map[string]string{
|
||||
"gtld ": `(?i)(` + strings.Join(allTlds, `|`) + `)(?-i)`,
|
||||
"otherScheme": `(?i)(` + strings.Join(xurls.SchemesNoAuthority, `|`) + `)(?-i):`,
|
||||
})
|
||||
}
|
||||
|
||||
func main() {
|
||||
log.Printf("Generating %s...", path)
|
||||
if err := writeRegex(xurls.TLDs); err != nil {
|
||||
log.Fatalf("Could not write %s: %v", path, err)
|
||||
}
|
||||
}
|
140
Godeps/_workspace/src/github.com/mvdan/xurls/generate/tldsgen/main.go
generated
vendored
Normal file
140
Godeps/_workspace/src/github.com/mvdan/xurls/generate/tldsgen/main.go
generated
vendored
Normal file
|
@ -0,0 +1,140 @@
|
|||
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
|
||||
// See LICENSE for licensing information
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
const path = "tlds.go"
|
||||
|
||||
var tldsTmpl = template.Must(template.New("tlds").Parse(`// Generated by tldsgen
|
||||
|
||||
package xurls
|
||||
|
||||
// TLDs is a sorted list of all public top-level domains.
|
||||
//
|
||||
// Sources:{{range $_, $url := .URLs}}
|
||||
// * {{$url}}{{end}}
|
||||
var TLDs = []string{
|
||||
{{range $_, $tld := .TLDs}}` + "\t`" + `{{$tld}}` + "`" + `,
|
||||
{{end}}}
|
||||
`))
|
||||
|
||||
func cleanTld(tld string) string {
|
||||
tld = strings.ToLower(tld)
|
||||
if strings.HasPrefix(tld, "xn--") {
|
||||
return ""
|
||||
}
|
||||
return tld
|
||||
}
|
||||
|
||||
func fetchFromURL(url, pat string) {
|
||||
defer wg.Done()
|
||||
log.Printf("Fetching %s", url)
|
||||
resp, err := http.Get(url)
|
||||
if err == nil && resp.StatusCode >= 400 {
|
||||
err = errors.New(resp.Status)
|
||||
}
|
||||
if err != nil {
|
||||
errChan <- err
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
re := regexp.MustCompile(pat)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
tld := re.FindString(line)
|
||||
tld = cleanTld(tld)
|
||||
if tld == "" {
|
||||
continue
|
||||
}
|
||||
tldChan <- tld
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
errChan <- err
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
tldChan = make(chan string)
|
||||
errChan = make(chan error)
|
||||
)
|
||||
|
||||
func tldList() ([]string, []string, error) {
|
||||
var urls []string
|
||||
fromURL := func(url, pat string) {
|
||||
urls = append(urls, url)
|
||||
wg.Add(1)
|
||||
go fetchFromURL(url, pat)
|
||||
}
|
||||
fromURL("https://data.iana.org/TLD/tlds-alpha-by-domain.txt",
|
||||
`^[^#]+$`)
|
||||
fromURL("https://publicsuffix.org/list/effective_tld_names.dat",
|
||||
`^[^/.]+$`)
|
||||
|
||||
tldSet := make(map[string]struct{})
|
||||
anyError := false
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case tld := <-tldChan:
|
||||
tldSet[tld] = struct{}{}
|
||||
case err := <-errChan:
|
||||
log.Printf("%v", err)
|
||||
anyError = true
|
||||
}
|
||||
}
|
||||
}()
|
||||
wg.Wait()
|
||||
|
||||
if anyError {
|
||||
return nil, nil, errors.New("there were some errors while fetching the TLDs")
|
||||
}
|
||||
|
||||
tlds := make([]string, 0, len(tldSet))
|
||||
for tld := range tldSet {
|
||||
tlds = append(tlds, tld)
|
||||
}
|
||||
|
||||
sort.Strings(tlds)
|
||||
return tlds, urls, nil
|
||||
}
|
||||
|
||||
func writeTlds(tlds, urls []string) error {
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
return tldsTmpl.Execute(f, struct {
|
||||
TLDs []string
|
||||
URLs []string
|
||||
}{
|
||||
TLDs: tlds,
|
||||
URLs: urls,
|
||||
})
|
||||
}
|
||||
|
||||
func main() {
|
||||
tlds, urls, err := tldList()
|
||||
if err != nil {
|
||||
log.Fatalf("Could not get TLD list: %v", err)
|
||||
}
|
||||
log.Printf("Generating %s...", path)
|
||||
if err := writeTlds(tlds, urls); err != nil {
|
||||
log.Fatalf("Could not write path: %v", err)
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,14 @@
|
|||
package xurls
|
||||
|
||||
// SchemesNoAuthority is a sorted list of some well-known url schemes that are
|
||||
// followed by ":" instead of "://". Since these are more prone to false
|
||||
// positives, we limit their matching.
|
||||
var SchemesNoAuthority = []string{
|
||||
`bitcoin`, // Bitcoin
|
||||
`file`, // Files
|
||||
`magnet`, // Torrent magnets
|
||||
`mailto`, // Mail
|
||||
`sms`, // SMS
|
||||
`tel`, // Telephone
|
||||
`xmpp`, // XMPP
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,22 @@
|
|||
package xurls
|
||||
|
||||
// PseudoTLDs is a sorted list of some widely used unofficial TLDs.
|
||||
//
|
||||
// Sources:
|
||||
// * https://en.wikipedia.org/wiki/Pseudo-top-level_domain
|
||||
// * https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains
|
||||
// * https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00
|
||||
// * https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml
|
||||
var PseudoTLDs = []string{
|
||||
`bit`, // Namecoin
|
||||
`example`, // Example domain
|
||||
`exit`, // Tor exit node
|
||||
`gnu`, // GNS by public key
|
||||
`i2p`, // I2P network
|
||||
`invalid`, // Invalid domain
|
||||
`local`, // Local network
|
||||
`localhost`, // Local network
|
||||
`onion`, // Tor hidden services
|
||||
`test`, // Test domain
|
||||
`zkey`, // GNS domain name
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
|
||||
// See LICENSE for licensing information
|
||||
|
||||
// Package xurls extracts urls from plain text using regular expressions.
|
||||
package xurls
|
||||
|
||||
import "regexp"
|
||||
|
||||
//go:generate go run generate/tldsgen/main.go
|
||||
//go:generate go run generate/regexgen/main.go
|
||||
|
||||
const (
|
||||
letter = `\p{L}`
|
||||
number = `\p{N}`
|
||||
iriChar = letter + number
|
||||
currency = `\p{Sc}`
|
||||
otherSymb = `\p{So}`
|
||||
endChar = iriChar + `/\-+_&~*%=#` + currency
|
||||
midChar = endChar + `@.,:;'?!|` + otherSymb
|
||||
wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)`
|
||||
wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]`
|
||||
wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}`
|
||||
wellAll = wellParen + `|` + wellBrack + `|` + wellBrace
|
||||
pathCont = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+`
|
||||
comScheme = `[a-zA-Z][a-zA-Z.\-+]*://`
|
||||
scheme = `(` + comScheme + `|` + otherScheme + `)`
|
||||
|
||||
iri = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?`
|
||||
domain = `(` + iri + `\.)+`
|
||||
octet = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`
|
||||
ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b`
|
||||
ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:`
|
||||
ipAddr = `(` + ipv4Addr + `|` + ipv6Addr + `)`
|
||||
site = domain + gtld
|
||||
hostName = `(` + site + `|` + ipAddr + `)`
|
||||
port = `(:[0-9]*)?`
|
||||
path = `(/|/` + pathCont + `?|\b|$)`
|
||||
webURL = hostName + port + path
|
||||
|
||||
strict = `(\b` + scheme + pathCont + `)`
|
||||
relaxed = `(` + strict + `|` + webURL + `)`
|
||||
)
|
||||
|
||||
var (
|
||||
// Relaxed matches all the urls it can find.
|
||||
Relaxed = regexp.MustCompile(relaxed)
|
||||
// Strict only matches urls with a scheme to avoid false positives.
|
||||
Strict = regexp.MustCompile(strict)
|
||||
)
|
||||
|
||||
func init() {
|
||||
Relaxed.Longest()
|
||||
Strict.Longest()
|
||||
}
|
||||
|
||||
// StrictMatchingScheme produces a regexp that matches urls like Strict but
|
||||
// whose scheme matches the given regular expression.
|
||||
func StrictMatchingScheme(exp string) (*regexp.Regexp, error) {
|
||||
strictMatching := `(\b(?i)(` + exp + `)(?-i)` + pathCont + `)`
|
||||
re, err := regexp.Compile(strictMatching)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
re.Longest()
|
||||
return re, nil
|
||||
}
|
Loading…
Reference in New Issue