update godep

pull/6/head
Chao Xu 2016-01-09 17:04:15 -08:00
parent 3ec4cd423e
commit d85823b051
13 changed files with 2029 additions and 0 deletions

5
Godeps/Godeps.json generated
View File

@ -762,6 +762,11 @@
"ImportPath": "github.com/mitchellh/mapstructure",
"Rev": "740c764bc6149d3f1806231418adb9f52c11bcbf"
},
{
"ImportPath": "github.com/mvdan/xurls",
"Comment": "v0.8.0-14-g1b768d7",
"Rev": "1b768d7c393abd8e8dda1458385a57becd4b2d4e"
},
{
"ImportPath": "github.com/mxk/go-flowrate/flowrate",
"Rev": "cca7078d478f8520f85629ad7c68962d31ed7682"

View File

@ -0,0 +1,3 @@
cmd/xurls/xurls
generate/tldsgen/tldsgen
generate/regexgen/regexgen

View File

@ -0,0 +1,5 @@
language: go
go:
- 1.4.3
- 1.5.1

27
Godeps/_workspace/src/github.com/mvdan/xurls/LICENSE generated vendored Normal file
View File

@ -0,0 +1,27 @@
Copyright (c) 2015, Daniel Martí. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

31
Godeps/_workspace/src/github.com/mvdan/xurls/README.md generated vendored Normal file
View File

@ -0,0 +1,31 @@
# xurls
[![GoDoc](https://godoc.org/github.com/mvdan/xurls?status.svg)](https://godoc.org/github.com/mvdan/xurls) [![Travis](https://travis-ci.org/mvdan/xurls.svg?branch=master)](https://travis-ci.org/mvdan/xurls)
Extract urls from text using regular expressions.
go get github.com/mvdan/xurls
```go
import "github.com/mvdan/xurls"
func main() {
xurls.Relaxed.FindString("Do gophers live in golang.org?")
// "golang.org"
xurls.Relaxed.FindAllString("foo.com is http://foo.com/.", -1)
// []string{"foo.com", "http://foo.com/"}
xurls.Strict.FindAllString("foo.com is http://foo.com/.", -1)
// []string{"http://foo.com/"}
}
```
#### cmd/xurls
Reads text and prints one url per line.
go get github.com/mvdan/xurls/cmd/xurls
```shell
$ echo "Do gophers live in http://golang.org?" | xurls
http://golang.org
```

View File

@ -0,0 +1,83 @@
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
package main
import (
"bufio"
"flag"
"fmt"
"os"
"regexp"
"github.com/mvdan/xurls"
)
var (
matching = flag.String("m", "", "")
relaxed = flag.Bool("r", false, "")
)
func init() {
flag.Usage = func() {
p := func(format string, a ...interface{}) {
fmt.Fprintf(os.Stderr, format, a...)
}
p("Usage: xurls [-h] [files]\n\n")
p("If no files are given, it reads from standard input.\n\n")
p(" -m <regexp> only match urls whose scheme matches a regexp\n")
p(" example: 'https?://|mailto:'\n")
p(" -r also match urls without a scheme (relaxed)\n")
}
}
func scanPath(re *regexp.Regexp, path string) error {
r := os.Stdin
if path != "-" {
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
r = f
}
scanner := bufio.NewScanner(r)
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
word := scanner.Text()
for _, match := range re.FindAllString(word, -1) {
fmt.Println(match)
}
}
return scanner.Err()
}
func main() {
flag.Parse()
if *relaxed && *matching != "" {
errExit(fmt.Errorf("-r and -m at the same time don't make much sense"))
}
re := xurls.Strict
if *relaxed {
re = xurls.Relaxed
} else if *matching != "" {
var err error
if re, err = xurls.StrictMatchingScheme(*matching); err != nil {
errExit(err)
}
}
args := flag.Args()
if len(args) == 0 {
args = []string{"-"}
}
for _, path := range args {
if err := scanPath(re, path); err != nil {
errExit(err)
}
}
}
func errExit(err error) {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}

View File

@ -0,0 +1,70 @@
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
package main
import (
"log"
"os"
"sort"
"strings"
"text/template"
"golang.org/x/net/idna"
"github.com/mvdan/xurls"
)
const path = "regex.go"
var regexTmpl = template.Must(template.New("regex").Parse(`// Generated by regexgen
package xurls
const ({{ range $key, $value := . }}
{{$key}} = ` + "`" + `{{$value}}` + "`" + `{{end}}
)
`))
func writeRegex(tlds []string) error {
allTldsSet := make(map[string]struct{})
add := func(tld string) {
if _, e := allTldsSet[tld]; e {
log.Fatalf("Duplicate TLD: %s", tld)
}
allTldsSet[tld] = struct{}{}
}
for _, tldlist := range [...][]string{tlds, xurls.PseudoTLDs} {
for _, tld := range tldlist {
add(tld)
asciiTld, err := idna.ToASCII(tld)
if err != nil {
return err
}
if asciiTld != tld {
add(asciiTld)
}
}
}
var allTlds []string
for tld := range allTldsSet {
allTlds = append(allTlds, tld)
}
sort.Strings(allTlds)
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return regexTmpl.Execute(f, map[string]string{
"gtld ": `(?i)(` + strings.Join(allTlds, `|`) + `)(?-i)`,
"otherScheme": `(?i)(` + strings.Join(xurls.SchemesNoAuthority, `|`) + `)(?-i):`,
})
}
func main() {
log.Printf("Generating %s...", path)
if err := writeRegex(xurls.TLDs); err != nil {
log.Fatalf("Could not write %s: %v", path, err)
}
}

View File

@ -0,0 +1,140 @@
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
package main
import (
"bufio"
"errors"
"log"
"net/http"
"os"
"regexp"
"sort"
"strings"
"sync"
"text/template"
)
const path = "tlds.go"
var tldsTmpl = template.Must(template.New("tlds").Parse(`// Generated by tldsgen
package xurls
// TLDs is a sorted list of all public top-level domains.
//
// Sources:{{range $_, $url := .URLs}}
// * {{$url}}{{end}}
var TLDs = []string{
{{range $_, $tld := .TLDs}}` + "\t`" + `{{$tld}}` + "`" + `,
{{end}}}
`))
func cleanTld(tld string) string {
tld = strings.ToLower(tld)
if strings.HasPrefix(tld, "xn--") {
return ""
}
return tld
}
func fetchFromURL(url, pat string) {
defer wg.Done()
log.Printf("Fetching %s", url)
resp, err := http.Get(url)
if err == nil && resp.StatusCode >= 400 {
err = errors.New(resp.Status)
}
if err != nil {
errChan <- err
return
}
defer resp.Body.Close()
scanner := bufio.NewScanner(resp.Body)
re := regexp.MustCompile(pat)
for scanner.Scan() {
line := scanner.Text()
tld := re.FindString(line)
tld = cleanTld(tld)
if tld == "" {
continue
}
tldChan <- tld
}
if err := scanner.Err(); err != nil {
errChan <- err
}
}
var (
wg sync.WaitGroup
tldChan = make(chan string)
errChan = make(chan error)
)
func tldList() ([]string, []string, error) {
var urls []string
fromURL := func(url, pat string) {
urls = append(urls, url)
wg.Add(1)
go fetchFromURL(url, pat)
}
fromURL("https://data.iana.org/TLD/tlds-alpha-by-domain.txt",
`^[^#]+$`)
fromURL("https://publicsuffix.org/list/effective_tld_names.dat",
`^[^/.]+$`)
tldSet := make(map[string]struct{})
anyError := false
go func() {
for {
select {
case tld := <-tldChan:
tldSet[tld] = struct{}{}
case err := <-errChan:
log.Printf("%v", err)
anyError = true
}
}
}()
wg.Wait()
if anyError {
return nil, nil, errors.New("there were some errors while fetching the TLDs")
}
tlds := make([]string, 0, len(tldSet))
for tld := range tldSet {
tlds = append(tlds, tld)
}
sort.Strings(tlds)
return tlds, urls, nil
}
func writeTlds(tlds, urls []string) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return tldsTmpl.Execute(f, struct {
TLDs []string
URLs []string
}{
TLDs: tlds,
URLs: urls,
})
}
func main() {
tlds, urls, err := tldList()
if err != nil {
log.Fatalf("Could not get TLD list: %v", err)
}
log.Printf("Generating %s...", path)
if err := writeTlds(tlds, urls); err != nil {
log.Fatalf("Could not write path: %v", err)
}
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,14 @@
package xurls
// SchemesNoAuthority is a sorted list of some well-known url schemes that are
// followed by ":" instead of "://". Since these are more prone to false
// positives, we limit their matching.
var SchemesNoAuthority = []string{
`bitcoin`, // Bitcoin
`file`, // Files
`magnet`, // Torrent magnets
`mailto`, // Mail
`sms`, // SMS
`tel`, // Telephone
`xmpp`, // XMPP
}

1555
Godeps/_workspace/src/github.com/mvdan/xurls/tlds.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
package xurls
// PseudoTLDs is a sorted list of some widely used unofficial TLDs.
//
// Sources:
// * https://en.wikipedia.org/wiki/Pseudo-top-level_domain
// * https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains
// * https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00
// * https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml
var PseudoTLDs = []string{
`bit`, // Namecoin
`example`, // Example domain
`exit`, // Tor exit node
`gnu`, // GNS by public key
`i2p`, // I2P network
`invalid`, // Invalid domain
`local`, // Local network
`localhost`, // Local network
`onion`, // Tor hidden services
`test`, // Test domain
`zkey`, // GNS domain name
}

66
Godeps/_workspace/src/github.com/mvdan/xurls/xurls.go generated vendored Normal file
View File

@ -0,0 +1,66 @@
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
// Package xurls extracts urls from plain text using regular expressions.
package xurls
import "regexp"
//go:generate go run generate/tldsgen/main.go
//go:generate go run generate/regexgen/main.go
const (
letter = `\p{L}`
number = `\p{N}`
iriChar = letter + number
currency = `\p{Sc}`
otherSymb = `\p{So}`
endChar = iriChar + `/\-+_&~*%=#` + currency
midChar = endChar + `@.,:;'?!|` + otherSymb
wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)`
wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]`
wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}`
wellAll = wellParen + `|` + wellBrack + `|` + wellBrace
pathCont = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+`
comScheme = `[a-zA-Z][a-zA-Z.\-+]*://`
scheme = `(` + comScheme + `|` + otherScheme + `)`
iri = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?`
domain = `(` + iri + `\.)+`
octet = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`
ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b`
ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:`
ipAddr = `(` + ipv4Addr + `|` + ipv6Addr + `)`
site = domain + gtld
hostName = `(` + site + `|` + ipAddr + `)`
port = `(:[0-9]*)?`
path = `(/|/` + pathCont + `?|\b|$)`
webURL = hostName + port + path
strict = `(\b` + scheme + pathCont + `)`
relaxed = `(` + strict + `|` + webURL + `)`
)
var (
// Relaxed matches all the urls it can find.
Relaxed = regexp.MustCompile(relaxed)
// Strict only matches urls with a scheme to avoid false positives.
Strict = regexp.MustCompile(strict)
)
func init() {
Relaxed.Longest()
Strict.Longest()
}
// StrictMatchingScheme produces a regexp that matches urls like Strict but
// whose scheme matches the given regular expression.
func StrictMatchingScheme(exp string) (*regexp.Regexp, error) {
strictMatching := `(\b(?i)(` + exp + `)(?-i)` + pathCont + `)`
re, err := regexp.Compile(strictMatching)
if err != nil {
return nil, err
}
re.Longest()
return re, nil
}