mirror of https://github.com/k3s-io/k3s
vendor misspell
parent
948f28a74c
commit
48434b44c4
|
@ -6,6 +6,7 @@
|
|||
"github.com/onsi/ginkgo/ginkgo",
|
||||
"github.com/jteeuwen/go-bindata/go-bindata",
|
||||
"github.com/tools/godep",
|
||||
"github.com/client9/misspell/cmd/misspell",
|
||||
"./..."
|
||||
],
|
||||
"Deps": [
|
||||
|
@ -354,6 +355,16 @@
|
|||
"ImportPath": "github.com/chai2010/gettext-go/gettext/po",
|
||||
"Rev": "c6fed771bfd517099caf0f7a961671fa8ed08723"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/client9/misspell",
|
||||
"Comment": "v0.3.0-7-g9ce5d97",
|
||||
"Rev": "9ce5d979ffdaca6385988d7ad1079a33ec942d20"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/client9/misspell/cmd/misspell",
|
||||
"Comment": "v0.3.0-7-g9ce5d97",
|
||||
"Rev": "9ce5d979ffdaca6385988d7ad1079a33ec942d20"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/cloudflare/cfssl/auth",
|
||||
"Comment": "1.2.0",
|
||||
|
|
|
@ -11452,6 +11452,66 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
================================================================================
|
||||
|
||||
|
||||
================================================================================
|
||||
= vendor/github.com/client9/misspell licensed under: =
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015-2017 Nick Galbreath
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
|
||||
= vendor/github.com/client9/misspell/LICENSE 387f6b7d6741c8a7f4f7e3c2bbdf97e4
|
||||
================================================================================
|
||||
|
||||
|
||||
================================================================================
|
||||
= vendor/github.com/client9/misspell/cmd/misspell licensed under: =
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015-2017 Nick Galbreath
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
|
||||
= vendor/github.com/client9/misspell/LICENSE 387f6b7d6741c8a7f4f7e3c2bbdf97e4
|
||||
================================================================================
|
||||
|
||||
|
||||
================================================================================
|
||||
= vendor/github.com/cloudflare/cfssl/auth licensed under: =
|
||||
|
||||
|
|
|
@ -58,6 +58,7 @@ REQUIRED_BINS=(
|
|||
"github.com/onsi/ginkgo/ginkgo"
|
||||
"github.com/jteeuwen/go-bindata/go-bindata"
|
||||
"github.com/tools/godep"
|
||||
"github.com/client9/misspell/cmd/misspell"
|
||||
"./..."
|
||||
)
|
||||
|
||||
|
|
|
@ -47,6 +47,7 @@ filegroup(
|
|||
"//vendor/github.com/beorn7/perks/quantile:all-srcs",
|
||||
"//vendor/github.com/blang/semver:all-srcs",
|
||||
"//vendor/github.com/chai2010/gettext-go/gettext:all-srcs",
|
||||
"//vendor/github.com/client9/misspell:all-srcs",
|
||||
"//vendor/github.com/cloudflare/cfssl/auth:all-srcs",
|
||||
"//vendor/github.com/cloudflare/cfssl/certdb:all-srcs",
|
||||
"//vendor/github.com/cloudflare/cfssl/config:all-srcs",
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
dist/
|
||||
bin/
|
||||
|
||||
# editor turds
|
||||
*~
|
||||
*.gz
|
||||
*.bz2
|
||||
*.csv
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
|
@ -0,0 +1,12 @@
|
|||
sudo: required
|
||||
dist: trusty
|
||||
group: edge
|
||||
language: go
|
||||
go:
|
||||
- 1.8.3
|
||||
git:
|
||||
depth: 1
|
||||
script:
|
||||
- make -e ci
|
||||
after_success:
|
||||
- test -n "$TRAVIS_TAG" && ./scripts/goreleaser.sh
|
|
@ -0,0 +1,35 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"ascii.go",
|
||||
"case.go",
|
||||
"legal.go",
|
||||
"mime.go",
|
||||
"notwords.go",
|
||||
"replace.go",
|
||||
"stringreplacer.go",
|
||||
"url.go",
|
||||
"words.go",
|
||||
],
|
||||
importpath = "github.com/client9/misspell",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//vendor/github.com/client9/misspell/cmd/misspell:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
|
@ -0,0 +1,37 @@
|
|||
FROM golang:1.8.3-alpine
|
||||
|
||||
# cache buster
|
||||
RUN echo 4
|
||||
|
||||
# git is needed for "go get" below
|
||||
RUN apk add --no-cache git make
|
||||
|
||||
# these are my standard testing / linting tools
|
||||
RUN /bin/true \
|
||||
&& go get -u github.com/golang/dep/cmd/dep \
|
||||
&& go get -u github.com/alecthomas/gometalinter \
|
||||
&& gometalinter --install \
|
||||
&& rm -rf /go/src /go/pkg
|
||||
#
|
||||
# * SCOWL word list
|
||||
#
|
||||
# Downloads
|
||||
# http://wordlist.aspell.net/dicts/
|
||||
# --> http://app.aspell.net/create
|
||||
#
|
||||
|
||||
# use en_US large size
|
||||
# use regular size for others
|
||||
ENV SOURCE_US_BIG http://app.aspell.net/create?max_size=70&spelling=US&max_variant=2&diacritic=both&special=hacker&special=roman-numerals&download=wordlist&encoding=utf-8&format=inline
|
||||
|
||||
# should be able tell difference between English variations using this
|
||||
ENV SOURCE_US http://app.aspell.net/create?max_size=60&spelling=US&max_variant=1&diacritic=both&download=wordlist&encoding=utf-8&format=inline
|
||||
ENV SOURCE_GB_ISE http://app.aspell.net/create?max_size=60&spelling=GBs&max_variant=2&diacritic=both&download=wordlist&encoding=utf-8&format=inline
|
||||
ENV SOURCE_GB_IZE http://app.aspell.net/create?max_size=60&spelling=GBz&max_variant=2&diacritic=both&download=wordlist&encoding=utf-8&format=inline
|
||||
ENV SOURCE_CA http://app.aspell.net/create?max_size=60&spelling=CA&max_variant=2&diacritic=both&download=wordlist&encoding=utf-8&format=inline
|
||||
|
||||
RUN /bin/true \
|
||||
&& mkdir /scowl-wl \
|
||||
&& wget -O /scowl-wl/words-US-60.txt ${SOURCE_US} \
|
||||
&& wget -O /scowl-wl/words-GB-ise-60.txt ${SOURCE_GB_ISE}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/gobwas/glob"
|
||||
packages = [".","compiler","match","syntax","syntax/ast","syntax/lexer","util/runes","util/strings"]
|
||||
revision = "bea32b9cd2d6f55753d94a28e959b13f0244797a"
|
||||
version = "v0.2.2"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "e481c81c87260652d25840e1d95d6e530331c095d64d84422a166f37ae0a77d3"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
# Gopkg.toml example
|
||||
#
|
||||
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
|
||||
# for detailed Gopkg.toml documentation.
|
||||
#
|
||||
# required = ["github.com/user/thing/cmd/thing"]
|
||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project"
|
||||
# version = "1.0.0"
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project2"
|
||||
# branch = "dev"
|
||||
# source = "github.com/myfork/project2"
|
||||
#
|
||||
# [[override]]
|
||||
# name = "github.com/x/y"
|
||||
# version = "2.4.0"
|
||||
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/gobwas/glob"
|
||||
version = "0.2.2"
|
|
@ -0,0 +1,22 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015-2017 Nick Galbreath
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
CONTAINER=nickg/misspell
|
||||
|
||||
install: ## install misspell into GOPATH/bin
|
||||
go install ./cmd/misspell
|
||||
|
||||
build: hooks ## build and lint misspell
|
||||
go install ./cmd/misspell
|
||||
gometalinter \
|
||||
--vendor \
|
||||
--deadline=60s \
|
||||
--disable-all \
|
||||
--enable=vet \
|
||||
--enable=golint \
|
||||
--enable=gofmt \
|
||||
--enable=goimports \
|
||||
--enable=gosimple \
|
||||
--enable=staticcheck \
|
||||
--enable=ineffassign \
|
||||
--exclude=/usr/local/go/src/net/lookup_unix.go \
|
||||
./...
|
||||
go test .
|
||||
|
||||
test: ## run all tests
|
||||
go test .
|
||||
|
||||
# the grep in line 2 is to remove misspellings in the spelling dictionary
|
||||
# that trigger false positives!!
|
||||
falsepositives: /scowl-wl
|
||||
cat /scowl-wl/words-US-60.txt | \
|
||||
grep -i -v -E "payed|Tyre|Euclidian|nonoccurence|dependancy|reenforced|accidently|surprize|dependance|idealogy|binominal|causalities|conquerer|withing|casette|analyse|analogue|dialogue|paralyse|catalogue|archaeolog|clarinettist|catalyses|cancell|chisell|ageing|cataloguing" | \
|
||||
misspell -debug -error
|
||||
cat /scowl-wl/words-GB-ise-60.txt | \
|
||||
grep -v -E "payed|nonoccurence|withing" | \
|
||||
misspell -locale=UK -debug -error
|
||||
# cat /scowl-wl/words-GB-ize-60.txt | \
|
||||
# grep -v -E "withing" | \
|
||||
# misspell -debug -error
|
||||
# cat /scowl-wl/words-CA-60.txt | \
|
||||
# grep -v -E "withing" | \
|
||||
# misspell -debug -error
|
||||
|
||||
bench: ## run benchmarks
|
||||
go test -bench '.*'
|
||||
|
||||
clean: ## clean up time
|
||||
rm -rf dist/ bin/
|
||||
go clean ./...
|
||||
git gc --aggressive
|
||||
|
||||
ci: ## run test like travis-ci does, requires docker
|
||||
docker run --rm \
|
||||
-v $(PWD):/go/src/github.com/client9/misspell \
|
||||
-w /go/src/github.com/client9/misspell \
|
||||
${CONTAINER} \
|
||||
make build falsepositives
|
||||
|
||||
docker-build: ## build a docker test image
|
||||
docker build -t ${CONTAINER} .
|
||||
|
||||
docker-pull: ## pull latest test image
|
||||
docker pull ${CONTAINER}
|
||||
|
||||
docker-console: ## log into the test image
|
||||
docker run --rm -it \
|
||||
-v $(PWD):/go/src/github.com/client9/misspell \
|
||||
-w /go/src/github.com/client9/misspell \
|
||||
${CONTAINER} sh
|
||||
|
||||
.git/hooks/pre-commit: scripts/pre-commit.sh
|
||||
cp -f scripts/pre-commit.sh .git/hooks/pre-commit
|
||||
.git/hooks/commit-msg: scripts/commit-msg.sh
|
||||
cp -f scripts/commit-msg.sh .git/hooks/commit-msg
|
||||
hooks: .git/hooks/pre-commit .git/hooks/commit-msg ## install git precommit hooks
|
||||
|
||||
.PHONY: help ci console docker-build bench
|
||||
|
||||
# https://www.client9.com/self-documenting-makefiles/
|
||||
help:
|
||||
@awk -F ':|##' '/^[^\t].+?:.*?##/ {\
|
||||
printf "\033[36m%-30s\033[0m %s\n", $$1, $$NF \
|
||||
}' $(MAKEFILE_LIST)
|
||||
.DEFAULT_GOAL=help
|
||||
.PHONY=help
|
||||
|
|
@ -0,0 +1,424 @@
|
|||
[![Build Status](https://travis-ci.org/client9/misspell.svg?branch=master)](https://travis-ci.org/client9/misspell) [![Go Report Card](https://goreportcard.com/badge/github.com/client9/misspell)](https://goreportcard.com/report/github.com/client9/misspell) [![GoDoc](https://godoc.org/github.com/client9/misspell?status.svg)](https://godoc.org/github.com/client9/misspell) [![Coverage](http://gocover.io/_badge/github.com/client9/misspell)](http://gocover.io/github.com/client9/misspell) [![license](https://img.shields.io/badge/license-MIT-blue.svg?style=flat)](https://raw.githubusercontent.com/client9/misspell/master/LICENSE)
|
||||
|
||||
Correct commonly misspelled English words... quickly.
|
||||
|
||||
### Install
|
||||
|
||||
|
||||
If you just want a binary and to start using `misspell`:
|
||||
|
||||
```
|
||||
curl -L -o ./install-misspell.sh https://git.io/misspell
|
||||
sh ./install-misspell.sh
|
||||
```
|
||||
|
||||
|
||||
Both will install as `./bin/misspell`. You can adjust the download location using the `-b` flag. File a ticket if you want another platform supported.
|
||||
|
||||
|
||||
If you use [Go](https://golang.org/), the best way to run `misspell` is by using [gometalinter](#gometalinter). Otherwise, install `misspell` the old-fashioned way:
|
||||
|
||||
```
|
||||
go get -u github.com/client9/misspell/cmd/misspell
|
||||
```
|
||||
|
||||
and misspell will be in your `GOPATH`
|
||||
|
||||
|
||||
Also if you like to live dangerously, one could do
|
||||
|
||||
```bash
|
||||
curl -L https://git.io/misspell | bash
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
|
||||
```bash
|
||||
$ misspell all.html your.txt important.md files.go
|
||||
your.txt:42:10 found "langauge" a misspelling of "language"
|
||||
|
||||
# ^ file, line, column
|
||||
```
|
||||
|
||||
```
|
||||
$ misspell -help
|
||||
Usage of misspell:
|
||||
-debug
|
||||
Debug matching, very slow
|
||||
-error
|
||||
Exit with 2 if misspelling found
|
||||
-f string
|
||||
'csv', 'sqlite3' or custom Golang template for output
|
||||
-i string
|
||||
ignore the following corrections, comma separated
|
||||
-j int
|
||||
Number of workers, 0 = number of CPUs
|
||||
-legal
|
||||
Show legal information and exit
|
||||
-locale string
|
||||
Correct spellings using locale perferances for US or UK. Default is to use a neutral variety of English. Setting locale to US will correct the British spelling of 'colour' to 'color'
|
||||
-o string
|
||||
output file or [stderr|stdout|] (default "stdout")
|
||||
-q Do not emit misspelling output
|
||||
-source string
|
||||
Source mode: auto=guess, go=golang source, text=plain or markdown-like text (default "auto")
|
||||
-w Overwrite file with corrections (default is just to display)
|
||||
```
|
||||
|
||||
## FAQ
|
||||
|
||||
* [Automatic Corrections](#correct)
|
||||
* [Converting UK spellings to US](#locale)
|
||||
* [Using pipes and stdin](#stdin)
|
||||
* [Golang special support](#golang)
|
||||
* [gometalinter support](#gometalinter)
|
||||
* [CSV Output](#csv)
|
||||
* [Using SQLite3](#sqlite)
|
||||
* [Changing output format](#output)
|
||||
* [Checking a folder recursively](#recursive)
|
||||
* [Performance](#performance)
|
||||
* [Known Issues](#issues)
|
||||
* [Debugging](#debug)
|
||||
* [False Negatives and missing words](#missing)
|
||||
* [Origin of Word Lists](#words)
|
||||
* [Software License](#license)
|
||||
* [Problem statement](#problem)
|
||||
* [Other spelling correctors](#others)
|
||||
* [Other ideas](#otherideas)
|
||||
|
||||
<a name="correct"></a>
|
||||
### How can I make the corrections automatically?
|
||||
|
||||
Just add the `-w` flag!
|
||||
|
||||
```
|
||||
$ misspell -w all.html your.txt important.md files.go
|
||||
your.txt:9:21:corrected "langauge" to "language"
|
||||
|
||||
# ^ File is rewritten only if a misspelling is found
|
||||
```
|
||||
|
||||
<a name="locale"></a>
|
||||
### How do I convert British spellings to American (or vice-versa)?
|
||||
|
||||
Add the `-locale US` flag!
|
||||
|
||||
```bash
|
||||
$ misspell -locale US important.txt
|
||||
important.txt:10:20 found "colour" a misspelling of "color"
|
||||
```
|
||||
|
||||
Add the `-locale UK` flag!
|
||||
|
||||
```bash
|
||||
$ echo "My favorite color is blue" | misspell -locale UK
|
||||
stdin:1:3:found "favorite color" a misspelling of "favourite colour"
|
||||
```
|
||||
|
||||
Help is appreciated as I'm neither British nor an
|
||||
expert in the English language.
|
||||
|
||||
<a name="recursive"></a>
|
||||
### How do you check an entire folder recursively?
|
||||
|
||||
Just list a directory you'd like to check
|
||||
|
||||
```bash
|
||||
misspell .
|
||||
misspell aDirectory anotherDirectory aFile
|
||||
```
|
||||
|
||||
You can also run misspell recursively using the following shell tricks:
|
||||
|
||||
```bash
|
||||
misspell directory/**/*
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
find . -type f | xargs misspell
|
||||
```
|
||||
|
||||
You can select a type of file as well. The following examples selects all `.txt` files that are *not* in the `vendor` directory:
|
||||
|
||||
```bash
|
||||
find . -type f -name '*.txt' | grep -v vendor/ | xargs misspell -error
|
||||
```
|
||||
|
||||
<a name="stdin"></a>
|
||||
### Can I use pipes or `stdin` for input?
|
||||
|
||||
Yes!
|
||||
|
||||
Print messages to `stderr` only:
|
||||
|
||||
```bash
|
||||
$ echo "zeebra" | misspell
|
||||
stdin:1:0:found "zeebra" a misspelling of "zebra"
|
||||
```
|
||||
|
||||
Print messages to `stderr`, and corrected text to `stdout`:
|
||||
|
||||
```bash
|
||||
$ echo "zeebra" | misspell -w
|
||||
stdin:1:0:corrected "zeebra" to "zebra"
|
||||
zebra
|
||||
```
|
||||
|
||||
Only print the corrected text to `stdout`:
|
||||
|
||||
```bash
|
||||
$ echo "zeebra" | misspell -w -q
|
||||
zebra
|
||||
```
|
||||
|
||||
<a name="golang"></a>
|
||||
### Are there special rules for golang source files?
|
||||
|
||||
Yes! If the file ends in `.go`, then misspell will only check spelling in
|
||||
comments.
|
||||
|
||||
If you want to force a file to be checked as a golang source, use `-source=go`
|
||||
on the command line. Conversely, you can check a golang source as if it were
|
||||
pure text by using `-source=text`. You might want to do this since many
|
||||
variable names have misspellings in them!
|
||||
|
||||
### Can I check only-comments in other other programming languages?
|
||||
|
||||
I'm told the using `-source=go` works well for ruby, javascript, java, c and
|
||||
c++.
|
||||
|
||||
It doesn't work well for python and bash.
|
||||
|
||||
<a name="gometalinter"></a>
|
||||
### Does this work with gometalinter?
|
||||
|
||||
[gometalinter](https://github.com/alecthomas/gometalinter) runs
|
||||
multiple golang linters. Starting on [2016-06-12](https://github.com/alecthomas/gometalinter/pull/134)
|
||||
gometalinter supports `misspell` natively but it is disabled by default.
|
||||
|
||||
```bash
|
||||
# update your copy of gometalinter
|
||||
go get -u github.com/alecthomas/gometalinter
|
||||
|
||||
# install updates and misspell
|
||||
gometalinter --install --update
|
||||
```
|
||||
|
||||
To use, just enable `misspell`
|
||||
|
||||
```
|
||||
gometalinter --enable misspell ./...
|
||||
```
|
||||
|
||||
Note that gometalinter only checks golang files, and uses the default options
|
||||
of `misspell`
|
||||
|
||||
You may wish to run this on your plaintext (.txt) and/or markdown files too.
|
||||
|
||||
|
||||
<a name="csv"></a>
|
||||
### How Can I Get CSV Output?
|
||||
|
||||
Using `-f csv`, the output is standard comma-seprated values with headers in the first row.
|
||||
|
||||
```
|
||||
misspell -f csv *
|
||||
file,line,column,typo,corrected
|
||||
"README.md",9,22,langauge,language
|
||||
"README.md",47,25,langauge,language
|
||||
```
|
||||
|
||||
<a name="sqlite"></a>
|
||||
### How can I export to SQLite3?
|
||||
|
||||
Using `-f sqlite`, the output is a [sqlite3](https://www.sqlite.org/index.html) dump-file.
|
||||
|
||||
```bash
|
||||
$ misspell -f sqlite * > /tmp/misspell.sql
|
||||
$ cat /tmp/misspell.sql
|
||||
|
||||
PRAGMA foreign_keys=OFF;
|
||||
BEGIN TRANSACTION;
|
||||
CREATE TABLE misspell(
|
||||
"file" TEXT,
|
||||
"line" INTEGER,i
|
||||
"column" INTEGER,i
|
||||
"typo" TEXT,
|
||||
"corrected" TEXT
|
||||
);
|
||||
INSERT INTO misspell VALUES("install.txt",202,31,"immediatly","immediately");
|
||||
# etc...
|
||||
COMMIT;
|
||||
```
|
||||
|
||||
```bash
|
||||
$ sqlite3 -init /tmp/misspell.sql :memory: 'select count(*) from misspell'
|
||||
1
|
||||
```
|
||||
|
||||
With some tricks you can directly pipe output to sqlite3 by using `-init /dev/stdin`:
|
||||
|
||||
```
|
||||
misspell -f sqlite * | sqlite3 -init /dev/stdin -column -cmd '.width 60 15' ':memory' \
|
||||
'select substr(file,35),typo,count(*) as count from misspell group by file, typo order by count desc;'
|
||||
```
|
||||
|
||||
<a name="ignore"></a>
|
||||
### How can I ignore rules?
|
||||
|
||||
Using the `-i "comma,separated,rules"` flag you can specify corrections to ignore.
|
||||
|
||||
For example, if you were to run `misspell -w -error -source=text` against document that contains the string `Guy Finkelshteyn Braswell`, misspell would change the text to `Guy Finkelstheyn Bras well`. You can then
|
||||
determine the rules to ignore by reverting the change and running the with the `-debug` flag. You can then see
|
||||
that the corrections were `htey -> they` and `aswell -> as well`. To ignore these two rules, you add `-i "htey,aswell"` to
|
||||
your command. With debug mode on, you can see it print the corrections, but it will no longer make them.
|
||||
|
||||
<a name="output"></a>
|
||||
### How can I change the output format?
|
||||
|
||||
Using the `-f template` flag you can pass in a
|
||||
[golang text template](https://golang.org/pkg/text/template/) to format the output.
|
||||
|
||||
One can use `printf "%q" VALUE` to safely quote a value.
|
||||
|
||||
The default template is compatible with [gometalinter](https://github.com/alecthomas/gometalinter)
|
||||
```
|
||||
{{ .Filename }}:{{ .Line }}:{{ .Column }}:corrected {{ printf "%q" .Original }} to "{{ printf "%q" .Corrected }}"
|
||||
```
|
||||
|
||||
To just print probable misspellings:
|
||||
|
||||
```
|
||||
-f '{{ .Original }}'
|
||||
```
|
||||
|
||||
<a name="problem"></a>
|
||||
### What problem does this solve?
|
||||
|
||||
This corrects commonly misspelled English words in computer source
|
||||
code, and other text-based formats (`.txt`, `.md`, etc).
|
||||
|
||||
It is designed to run quickly so it can be
|
||||
used as a [pre-commit hook](https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks)
|
||||
with minimal burden on the developer.
|
||||
|
||||
It does not work with binary formats (e.g. Word, etc).
|
||||
|
||||
It is not a complete spell-checking program nor a grammar checker.
|
||||
|
||||
<a name="others"></a>
|
||||
### What are other misspelling correctors and what's wrong with them?
|
||||
|
||||
Some other misspelling correctors:
|
||||
|
||||
* https://github.com/vlajos/misspell_fixer
|
||||
* https://github.com/lyda/misspell-check
|
||||
* https://github.com/lucasdemarchi/codespell
|
||||
|
||||
They all work but had problems that prevented me from using them at scale:
|
||||
|
||||
* slow, all of the above check one misspelling at a time (i.e. linear) using regexps
|
||||
* not MIT/Apache2 licensed (or equivalent)
|
||||
* have dependencies that don't work for me (python3, bash, linux sed, etc)
|
||||
* don't understand American vs. British English and sometimes makes unwelcome "corrections"
|
||||
|
||||
That said, they might be perfect for you and many have more features
|
||||
than this project!
|
||||
|
||||
<a name="performance"></a>
|
||||
### How fast is it?
|
||||
|
||||
Misspell is easily 100x to 1000x faster than other spelling correctors. You
|
||||
should be able to check and correct 1000 files in under 250ms.
|
||||
|
||||
This uses the mighty power of golang's
|
||||
[strings.Replacer](https://golang.org/pkg/strings/#Replacer) which is
|
||||
a implementation or variation of the
|
||||
[Aho–Corasick algorithm](https://en.wikipedia.org/wiki/Aho–Corasick_algorithm).
|
||||
This makes multiple substring matches *simultaneously*.
|
||||
|
||||
In addition this uses multiple CPU cores to work on multiple files.
|
||||
|
||||
<a name="issues"></a>
|
||||
### What problems does it have?
|
||||
|
||||
Unlike the other projects, this doesn't know what a "word" is. There may be
|
||||
more false positives and false negatives due to this. On the other hand, it
|
||||
sometimes catches things others don't.
|
||||
|
||||
Either way, please file bugs and we'll fix them!
|
||||
|
||||
Since it operates in parallel to make corrections, it can be non-obvious to
|
||||
determine exactly what word was corrected.
|
||||
|
||||
<a name="debug"></a>
|
||||
### It's making mistakes. How can I debug?
|
||||
|
||||
Run using `-debug` flag on the file you want. It should then print what word
|
||||
it is trying to correct. Then [file a
|
||||
bug](https://github.com/client9/misspell/issues) describing the problem.
|
||||
Thanks!
|
||||
|
||||
<a name="missing"></a>
|
||||
### Why is it making mistakes or missing items in golang files?
|
||||
|
||||
The matching function is *case-sensitive*, so variable names that are multiple
|
||||
worlds either in all-upper or all-lower case sometimes can cause false
|
||||
positives. For instance a variable named `bodyreader` could trigger a false
|
||||
positive since `yrea` is in the middle that could be corrected to `year`.
|
||||
Other problems happen if the variable name uses a English contraction that
|
||||
should use an apostrophe. The best way of fixing this is to use the
|
||||
[Effective Go naming
|
||||
conventions](https://golang.org/doc/effective_go.html#mixed-caps) and use
|
||||
[camelCase](https://en.wikipedia.org/wiki/CamelCase) for variable names. You
|
||||
can check your code using [golint](https://github.com/golang/lint)
|
||||
|
||||
<a name="license"></a>
|
||||
### What license is this?
|
||||
|
||||
The main code is [MIT](https://github.com/client9/misspell/blob/master/LICENSE).
|
||||
|
||||
Misspell also makes uses of the Golang standard library and contains a modified version of Golang's [strings.Replacer](https://golang.org/pkg/strings/#Replacer)
|
||||
which are covered under a [BSD License](https://github.com/golang/go/blob/master/LICENSE). Type `misspell -legal` for more details or see [legal.go](https://github.com/client9/misspell/blob/master/legal.go)
|
||||
|
||||
<a name="words"></a>
|
||||
### Where do the word lists come from?
|
||||
|
||||
It started with a word list from
|
||||
[Wikipedia](https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines).
|
||||
Unfortunately, this list had to be highly edited as many of the words are
|
||||
obsolete or based from mistakes on mechanical typewriters (I'm guessing).
|
||||
|
||||
Additional words were added based on actually mistakes seen in
|
||||
the wild (meaning self-generated).
|
||||
|
||||
Variations of UK and US spellings are based on many sources including:
|
||||
|
||||
* http://www.tysto.com/uk-us-spelling-list.html (with heavy editing, many are incorrect)
|
||||
* http://www.oxforddictionaries.com/us/words/american-and-british-spelling-american (excellent site but incomplete)
|
||||
* Diffing US and UK [scowl dictionaries](http://wordlist.aspell.net)
|
||||
|
||||
American English is more accepting of spelling variations than is British
|
||||
English, so "what is American or not" is subject to opinion. Corrections and help welcome.
|
||||
|
||||
<a name="otherideas"></a>
|
||||
### What are some other enhancements that could be done?
|
||||
|
||||
Here's some ideas for enhancements:
|
||||
|
||||
*Capitalization of proper nouns* could be done (e.g. weekday and month names, country names, language names)
|
||||
|
||||
*Opinionated US spellings* US English has a number of words with alternate
|
||||
spellings. Think [adviser vs.
|
||||
advisor](http://grammarist.com/spelling/adviser-advisor/). While "advisor" is not wrong, the opinionated US
|
||||
locale would correct "advisor" to "adviser".
|
||||
|
||||
*Versioning* Some type of versioning is needed so reporting mistakes and errors is easier.
|
||||
|
||||
*Feedback* Mistakes would be sent to some server for agregation and feedback review.
|
||||
|
||||
*Contractions and Apostrophes* This would optionally correct "isnt" to
|
||||
"isn't", etc.
|
|
@ -0,0 +1,25 @@
|
|||
# Release HOWTO
|
||||
|
||||
since I forget.
|
||||
|
||||
|
||||
1. Review existing tags and pick new release number
|
||||
|
||||
```bash
|
||||
git tag
|
||||
```
|
||||
|
||||
2. Tag locally
|
||||
|
||||
```bash
|
||||
git tag -a v0.1.0 -m "First release"
|
||||
```
|
||||
|
||||
3. Push
|
||||
|
||||
```bash
|
||||
git push origin v0.1.0
|
||||
```
|
||||
|
||||
4. Verify release and edit notes. See https://github.com/client9/misspell/releases
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
package misspell
|
||||
|
||||
// ByteToUpper converts an ascii byte to upper cases
|
||||
// Uses a branchless algorithm
|
||||
func ByteToUpper(x byte) byte {
|
||||
b := byte(0x80) | x
|
||||
c := b - byte(0x61)
|
||||
d := ^(b - byte(0x7b))
|
||||
e := (c & d) & (^x & 0x7f)
|
||||
return x - (e >> 2)
|
||||
}
|
||||
|
||||
// ByteToLower converts an ascii byte to lower case
|
||||
// uses a branchless algorithm
|
||||
func ByteToLower(eax byte) byte {
|
||||
ebx := eax&byte(0x7f) + byte(0x25)
|
||||
ebx = ebx&byte(0x7f) + byte(0x1a)
|
||||
ebx = ((ebx & ^eax) >> 2) & byte(0x20)
|
||||
return eax + ebx
|
||||
}
|
||||
|
||||
// ByteEqualFold does ascii compare, case insensitive
|
||||
func ByteEqualFold(a, b byte) bool {
|
||||
return a == b || ByteToLower(a) == ByteToLower(b)
|
||||
}
|
||||
|
||||
// StringEqualFold ASCII case-insensitive comparison
|
||||
// golang toUpper/toLower for both bytes and strings
|
||||
// appears to be Unicode based which is super slow
|
||||
// based from https://codereview.appspot.com/5180044/patch/14007/21002
|
||||
func StringEqualFold(s1, s2 string) bool {
|
||||
if len(s1) != len(s2) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(s1); i++ {
|
||||
c1 := s1[i]
|
||||
c2 := s2[i]
|
||||
// c1 & c2
|
||||
if c1 != c2 {
|
||||
c1 |= 'a' - 'A'
|
||||
c2 |= 'a' - 'A'
|
||||
if c1 != c2 || c1 < 'a' || c1 > 'z' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// StringHasPrefixFold is similar to strings.HasPrefix but comparison
|
||||
// is done ignoring ASCII case.
|
||||
// /
|
||||
func StringHasPrefixFold(s1, s2 string) bool {
|
||||
// prefix is bigger than input --> false
|
||||
if len(s1) < len(s2) {
|
||||
return false
|
||||
}
|
||||
if len(s1) == len(s2) {
|
||||
return StringEqualFold(s1, s2)
|
||||
}
|
||||
return StringEqualFold(s1[:len(s2)], s2)
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
package misspell
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// WordCase is an enum of various word casing styles
|
||||
type WordCase int
|
||||
|
||||
// Various WordCase types.. likely to be not correct
|
||||
const (
|
||||
CaseUnknown WordCase = iota
|
||||
CaseLower
|
||||
CaseUpper
|
||||
CaseTitle
|
||||
)
|
||||
|
||||
// CaseStyle returns what case style a word is in
|
||||
func CaseStyle(word string) WordCase {
|
||||
upperCount := 0
|
||||
lowerCount := 0
|
||||
|
||||
// this iterates over RUNES not BYTES
|
||||
for i := 0; i < len(word); i++ {
|
||||
ch := word[i]
|
||||
switch {
|
||||
case ch >= 'a' && ch <= 'z':
|
||||
lowerCount++
|
||||
case ch >= 'A' && ch <= 'Z':
|
||||
upperCount++
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case upperCount != 0 && lowerCount == 0:
|
||||
return CaseUpper
|
||||
case upperCount == 0 && lowerCount != 0:
|
||||
return CaseLower
|
||||
case upperCount == 1 && lowerCount > 0 && word[0] >= 'A' && word[0] <= 'Z':
|
||||
return CaseTitle
|
||||
}
|
||||
return CaseUnknown
|
||||
}
|
||||
|
||||
// CaseVariations returns
|
||||
// If AllUpper or First-Letter-Only is upcased: add the all upper case version
|
||||
// If AllLower, add the original, the title and upcase forms
|
||||
// If Mixed, return the original, and the all upcase form
|
||||
//
|
||||
func CaseVariations(word string, style WordCase) []string {
|
||||
switch style {
|
||||
case CaseLower:
|
||||
return []string{word, strings.ToUpper(word[0:1]) + word[1:], strings.ToUpper(word)}
|
||||
case CaseUpper:
|
||||
return []string{strings.ToUpper(word)}
|
||||
default:
|
||||
return []string{word, strings.ToUpper(word)}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["main.go"],
|
||||
importpath = "github.com/client9/misspell/cmd/misspell",
|
||||
visibility = ["//visibility:private"],
|
||||
deps = ["//vendor/github.com/client9/misspell:go_default_library"],
|
||||
)
|
||||
|
||||
go_binary(
|
||||
name = "misspell",
|
||||
embed = [":go_default_library"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
|
@ -0,0 +1,326 @@
|
|||
// The misspell command corrects commonly misspelled English words in source files.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/client9/misspell"
|
||||
)
|
||||
|
||||
var (
|
||||
defaultWrite *template.Template
|
||||
defaultRead *template.Template
|
||||
|
||||
stdout *log.Logger
|
||||
debug *log.Logger
|
||||
|
||||
version = "dev"
|
||||
)
|
||||
|
||||
const (
|
||||
// Note for gometalinter it must be "File:Line:Column: Msg"
|
||||
// note space beteen ": Msg"
|
||||
defaultWriteTmpl = `{{ .Filename }}:{{ .Line }}:{{ .Column }}: corrected "{{ .Original }}" to "{{ .Corrected }}"`
|
||||
defaultReadTmpl = `{{ .Filename }}:{{ .Line }}:{{ .Column }}: "{{ .Original }}" is a misspelling of "{{ .Corrected }}"`
|
||||
csvTmpl = `{{ printf "%q" .Filename }},{{ .Line }},{{ .Column }},{{ .Original }},{{ .Corrected }}`
|
||||
csvHeader = `file,line,column,typo,corrected`
|
||||
sqliteTmpl = `INSERT INTO misspell VALUES({{ printf "%q" .Filename }},{{ .Line }},{{ .Column }},{{ printf "%q" .Original }},{{ printf "%q" .Corrected }});`
|
||||
sqliteHeader = `PRAGMA foreign_keys=OFF;
|
||||
BEGIN TRANSACTION;
|
||||
CREATE TABLE misspell(
|
||||
"file" TEXT, "line" INTEGER, "column" INTEGER, "typo" TEXT, "corrected" TEXT
|
||||
);`
|
||||
sqliteFooter = "COMMIT;"
|
||||
)
|
||||
|
||||
func worker(writeit bool, r *misspell.Replacer, mode string, files <-chan string, results chan<- int) {
|
||||
count := 0
|
||||
for filename := range files {
|
||||
orig, err := misspell.ReadTextFile(filename)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
if len(orig) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
debug.Printf("Processing %s", filename)
|
||||
|
||||
var updated string
|
||||
var changes []misspell.Diff
|
||||
|
||||
if mode == "go" {
|
||||
updated, changes = r.ReplaceGo(orig)
|
||||
} else {
|
||||
updated, changes = r.Replace(orig)
|
||||
}
|
||||
|
||||
if len(changes) == 0 {
|
||||
continue
|
||||
}
|
||||
count += len(changes)
|
||||
for _, diff := range changes {
|
||||
// add in filename
|
||||
diff.Filename = filename
|
||||
|
||||
// output can be done by doing multiple goroutines
|
||||
// and can clobber os.Stdout.
|
||||
//
|
||||
// the log package can be used simultaneously from multiple goroutines
|
||||
var output bytes.Buffer
|
||||
if writeit {
|
||||
defaultWrite.Execute(&output, diff)
|
||||
} else {
|
||||
defaultRead.Execute(&output, diff)
|
||||
}
|
||||
|
||||
// goroutine-safe print to os.Stdout
|
||||
stdout.Println(output.String())
|
||||
}
|
||||
|
||||
if writeit {
|
||||
ioutil.WriteFile(filename, []byte(updated), 0)
|
||||
}
|
||||
}
|
||||
results <- count
|
||||
}
|
||||
|
||||
func main() {
|
||||
t := time.Now()
|
||||
var (
|
||||
workers = flag.Int("j", 0, "Number of workers, 0 = number of CPUs")
|
||||
writeit = flag.Bool("w", false, "Overwrite file with corrections (default is just to display)")
|
||||
quietFlag = flag.Bool("q", false, "Do not emit misspelling output")
|
||||
outFlag = flag.String("o", "stdout", "output file or [stderr|stdout|]")
|
||||
format = flag.String("f", "", "'csv', 'sqlite3' or custom Golang template for output")
|
||||
ignores = flag.String("i", "", "ignore the following corrections, comma separated")
|
||||
locale = flag.String("locale", "", "Correct spellings using locale perferances for US or UK. Default is to use a neutral variety of English. Setting locale to US will correct the British spelling of 'colour' to 'color'")
|
||||
mode = flag.String("source", "auto", "Source mode: auto=guess, go=golang source, text=plain or markdown-like text")
|
||||
debugFlag = flag.Bool("debug", false, "Debug matching, very slow")
|
||||
exitError = flag.Bool("error", false, "Exit with 2 if misspelling found")
|
||||
showVersion = flag.Bool("v", false, "Show version and exit")
|
||||
|
||||
showLegal = flag.Bool("legal", false, "Show legal information and exit")
|
||||
)
|
||||
flag.Parse()
|
||||
|
||||
if *showVersion {
|
||||
fmt.Println(version)
|
||||
return
|
||||
}
|
||||
if *showLegal {
|
||||
fmt.Println(misspell.Legal)
|
||||
return
|
||||
}
|
||||
if *debugFlag {
|
||||
debug = log.New(os.Stderr, "DEBUG ", 0)
|
||||
} else {
|
||||
debug = log.New(ioutil.Discard, "", 0)
|
||||
}
|
||||
|
||||
r := misspell.Replacer{
|
||||
Replacements: misspell.DictMain,
|
||||
Debug: *debugFlag,
|
||||
}
|
||||
//
|
||||
// Figure out regional variations
|
||||
//
|
||||
switch strings.ToUpper(*locale) {
|
||||
case "":
|
||||
// nothing
|
||||
case "US":
|
||||
r.AddRuleList(misspell.DictAmerican)
|
||||
case "UK", "GB":
|
||||
r.AddRuleList(misspell.DictBritish)
|
||||
case "NZ", "AU", "CA":
|
||||
log.Fatalf("Help wanted. https://github.com/client9/misspell/issues/6")
|
||||
default:
|
||||
log.Fatalf("Unknown locale: %q", *locale)
|
||||
}
|
||||
|
||||
//
|
||||
// Stuff to ignore
|
||||
//
|
||||
if len(*ignores) > 0 {
|
||||
r.RemoveRule(strings.Split(*ignores, ","))
|
||||
}
|
||||
|
||||
//
|
||||
// Source input mode
|
||||
//
|
||||
switch *mode {
|
||||
case "auto":
|
||||
case "go":
|
||||
case "text":
|
||||
default:
|
||||
log.Fatalf("Mode must be one of auto=guess, go=golang source, text=plain or markdown-like text")
|
||||
}
|
||||
|
||||
//
|
||||
// Custom output
|
||||
//
|
||||
switch {
|
||||
case *format == "csv":
|
||||
tmpl := template.Must(template.New("csv").Parse(csvTmpl))
|
||||
defaultWrite = tmpl
|
||||
defaultRead = tmpl
|
||||
stdout.Println(csvHeader)
|
||||
case *format == "sqlite" || *format == "sqlite3":
|
||||
tmpl := template.Must(template.New("sqlite3").Parse(sqliteTmpl))
|
||||
defaultWrite = tmpl
|
||||
defaultRead = tmpl
|
||||
stdout.Println(sqliteHeader)
|
||||
case len(*format) > 0:
|
||||
t, err := template.New("custom").Parse(*format)
|
||||
if err != nil {
|
||||
log.Fatalf("Unable to compile log format: %s", err)
|
||||
}
|
||||
defaultWrite = t
|
||||
defaultRead = t
|
||||
default: // format == ""
|
||||
defaultWrite = template.Must(template.New("defaultWrite").Parse(defaultWriteTmpl))
|
||||
defaultRead = template.Must(template.New("defaultRead").Parse(defaultReadTmpl))
|
||||
}
|
||||
|
||||
// we cant't just write to os.Stdout directly since we have multiple goroutine
|
||||
// all writing at the same time causing broken output. Log is routine safe.
|
||||
// we see it so it doesn't use a prefix or include a time stamp.
|
||||
switch {
|
||||
case *quietFlag || *outFlag == "/dev/null":
|
||||
stdout = log.New(ioutil.Discard, "", 0)
|
||||
case *outFlag == "/dev/stderr" || *outFlag == "stderr":
|
||||
stdout = log.New(os.Stderr, "", 0)
|
||||
case *outFlag == "/dev/stdout" || *outFlag == "stdout":
|
||||
stdout = log.New(os.Stdout, "", 0)
|
||||
case *outFlag == "" || *outFlag == "-":
|
||||
stdout = log.New(os.Stdout, "", 0)
|
||||
default:
|
||||
fo, err := os.Create(*outFlag)
|
||||
if err != nil {
|
||||
log.Fatalf("unable to create outfile %q: %s", *outFlag, err)
|
||||
}
|
||||
defer fo.Close()
|
||||
stdout = log.New(fo, "", 0)
|
||||
}
|
||||
|
||||
//
|
||||
// Number of Workers / CPU to use
|
||||
//
|
||||
if *workers < 0 {
|
||||
log.Fatalf("-j must >= 0")
|
||||
}
|
||||
if *workers == 0 {
|
||||
*workers = runtime.NumCPU()
|
||||
}
|
||||
if *debugFlag {
|
||||
*workers = 1
|
||||
}
|
||||
|
||||
//
|
||||
// Done with Flags.
|
||||
// Compile the Replacer and process files
|
||||
//
|
||||
r.Compile()
|
||||
|
||||
args := flag.Args()
|
||||
debug.Printf("initialization complete in %v", time.Since(t))
|
||||
|
||||
// stdin/stdout
|
||||
if len(args) == 0 {
|
||||
// if we are working with pipes/stdin/stdout
|
||||
// there is no concurrency, so we can directly
|
||||
// send data to the writers
|
||||
var fileout io.Writer
|
||||
var errout io.Writer
|
||||
switch *writeit {
|
||||
case true:
|
||||
// if we ARE writing the corrected stream
|
||||
// the corrected stream goes to stdout
|
||||
// and the misspelling errors goes to stderr
|
||||
// so we can do something like this:
|
||||
// curl something | misspell -w | gzip > afile.gz
|
||||
fileout = os.Stdout
|
||||
errout = os.Stderr
|
||||
case false:
|
||||
// if we are not writing out the corrected stream
|
||||
// then work just like files. Misspelling errors
|
||||
// are sent to stdout
|
||||
fileout = ioutil.Discard
|
||||
errout = os.Stdout
|
||||
}
|
||||
count := 0
|
||||
next := func(diff misspell.Diff) {
|
||||
count++
|
||||
|
||||
// don't even evaluate the output templates
|
||||
if *quietFlag {
|
||||
return
|
||||
}
|
||||
diff.Filename = "stdin"
|
||||
if *writeit {
|
||||
defaultWrite.Execute(errout, diff)
|
||||
} else {
|
||||
defaultRead.Execute(errout, diff)
|
||||
}
|
||||
errout.Write([]byte{'\n'})
|
||||
|
||||
}
|
||||
err := r.ReplaceReader(os.Stdin, fileout, next)
|
||||
if err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
switch *format {
|
||||
case "sqlite", "sqlite3":
|
||||
fileout.Write([]byte(sqliteFooter))
|
||||
}
|
||||
if count != 0 && *exitError {
|
||||
// error
|
||||
os.Exit(2)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
c := make(chan string, 64)
|
||||
results := make(chan int, *workers)
|
||||
|
||||
for i := 0; i < *workers; i++ {
|
||||
go worker(*writeit, &r, *mode, c, results)
|
||||
}
|
||||
|
||||
for _, filename := range args {
|
||||
filepath.Walk(filename, func(path string, info os.FileInfo, err error) error {
|
||||
if err == nil && !info.IsDir() {
|
||||
c <- path
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
close(c)
|
||||
|
||||
count := 0
|
||||
for i := 0; i < *workers; i++ {
|
||||
changed := <-results
|
||||
count += changed
|
||||
}
|
||||
|
||||
switch *format {
|
||||
case "sqlite", "sqlite3":
|
||||
stdout.Println(sqliteFooter)
|
||||
}
|
||||
|
||||
if count != 0 && *exitError {
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
# goreleaser.yml
|
||||
# https://github.com/goreleaser/goreleaser
|
||||
build:
|
||||
main: cmd/misspell/main.go
|
||||
binary: misspell
|
||||
ldflags: -s -w -X main.version={{.Version}}
|
||||
goos:
|
||||
- darwin
|
||||
- linux
|
||||
- windows
|
||||
goarch:
|
||||
- amd64
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
ignore:
|
||||
- goos: darwin
|
||||
goarch: 386
|
||||
- goos: windows
|
||||
goarch: 386
|
||||
|
||||
archive:
|
||||
name_template: "{{ .Binary }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
|
||||
replacements:
|
||||
amd64: 64bit
|
||||
386: 32bit
|
||||
darwin: mac
|
||||
|
||||
snapshot:
|
||||
name_template: SNAPSHOT-{{.Commit}}
|
|
@ -0,0 +1,318 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
# Code generated by godownloader. DO NOT EDIT.
|
||||
#
|
||||
|
||||
usage() {
|
||||
this=$1
|
||||
cat <<EOF
|
||||
$this: download go binaries for client9/misspell
|
||||
|
||||
Usage: $this [-b] bindir [version]
|
||||
-b sets bindir or installation directory, default "./bin"
|
||||
[version] is a version number from
|
||||
https://github.com/client9/misspell/releases
|
||||
If version is missing, then an attempt to find the latest will be found.
|
||||
|
||||
Generated by godownloader
|
||||
https://github.com/goreleaser/godownloader
|
||||
|
||||
EOF
|
||||
exit 2
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
#BINDIR is ./bin unless set be ENV
|
||||
# over-ridden by flag below
|
||||
|
||||
BINDIR=${BINDIR:-./bin}
|
||||
while getopts "b:h?" arg; do
|
||||
case "$arg" in
|
||||
b) BINDIR="$OPTARG" ;;
|
||||
h | \?) usage "$0" ;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND - 1))
|
||||
VERSION=$1
|
||||
}
|
||||
# this function wraps all the destructive operations
|
||||
# if a curl|bash cuts off the end of the script due to
|
||||
# network, either nothing will happen or will syntax error
|
||||
# out preventing half-done work
|
||||
execute() {
|
||||
TMPDIR=$(mktmpdir)
|
||||
echo "$PREFIX: downloading ${TARBALL_URL}"
|
||||
http_download "${TMPDIR}/${TARBALL}" "${TARBALL_URL}"
|
||||
|
||||
echo "$PREFIX: verifying checksums"
|
||||
http_download "${TMPDIR}/${CHECKSUM}" "${CHECKSUM_URL}"
|
||||
hash_sha256_verify "${TMPDIR}/${TARBALL}" "${TMPDIR}/${CHECKSUM}"
|
||||
|
||||
(cd "${TMPDIR}" && untar "${TARBALL}")
|
||||
install -d "${BINDIR}"
|
||||
install "${TMPDIR}/${BINARY}" "${BINDIR}/"
|
||||
echo "$PREFIX: installed as ${BINDIR}/${BINARY}"
|
||||
}
|
||||
is_supported_platform() {
|
||||
platform=$1
|
||||
found=1
|
||||
case "$platform" in
|
||||
darwin/amd64) found=0 ;;
|
||||
linux/amd64) found=0 ;;
|
||||
esac
|
||||
case "$platform" in
|
||||
darwin/386) found=1 ;;
|
||||
esac
|
||||
return $found
|
||||
}
|
||||
check_platform() {
|
||||
if is_supported_platform "$PLATFORM"; then
|
||||
# optional logging goes here
|
||||
true
|
||||
else
|
||||
echo "${PREFIX}: platform $PLATFORM is not supported. Make sure this script is up-to-date and file request at https://github.com/${PREFIX}/issues/new"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
adjust_version() {
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "$PREFIX: checking GitHub for latest version"
|
||||
VERSION=$(github_last_release "$OWNER/$REPO")
|
||||
fi
|
||||
# if version starts with 'v', remove it
|
||||
VERSION=${VERSION#v}
|
||||
}
|
||||
adjust_format() {
|
||||
# change format (tar.gz or zip) based on ARCH
|
||||
true
|
||||
}
|
||||
adjust_os() {
|
||||
# adjust archive name based on OS
|
||||
case ${OS} in
|
||||
386) OS=32bit ;;
|
||||
amd64) OS=64bit ;;
|
||||
darwin) OS=mac ;;
|
||||
esac
|
||||
true
|
||||
}
|
||||
adjust_arch() {
|
||||
# adjust archive name based on ARCH
|
||||
case ${ARCH} in
|
||||
386) ARCH=32bit ;;
|
||||
amd64) ARCH=64bit ;;
|
||||
darwin) ARCH=mac ;;
|
||||
esac
|
||||
true
|
||||
}
|
||||
|
||||
cat /dev/null <<EOF
|
||||
------------------------------------------------------------------------
|
||||
https://github.com/client9/shlib - portable posix shell functions
|
||||
Public domain - http://unlicense.org
|
||||
https://github.com/client9/shlib/blob/master/LICENSE.md
|
||||
but credit (and pull requests) appreciated.
|
||||
------------------------------------------------------------------------
|
||||
EOF
|
||||
is_command() {
|
||||
command -v "$1" >/dev/null
|
||||
}
|
||||
uname_os() {
|
||||
os=$(uname -s | tr '[:upper:]' '[:lower:]')
|
||||
echo "$os"
|
||||
}
|
||||
uname_arch() {
|
||||
arch=$(uname -m)
|
||||
case $arch in
|
||||
x86_64) arch="amd64" ;;
|
||||
x86) arch="386" ;;
|
||||
i686) arch="386" ;;
|
||||
i386) arch="386" ;;
|
||||
aarch64) arch="arm64" ;;
|
||||
armv5*) arch="arm5" ;;
|
||||
armv6*) arch="arm6" ;;
|
||||
armv7*) arch="arm7" ;;
|
||||
esac
|
||||
echo ${arch}
|
||||
}
|
||||
uname_os_check() {
|
||||
os=$(uname_os)
|
||||
case "$os" in
|
||||
darwin) return 0 ;;
|
||||
dragonfly) return 0 ;;
|
||||
freebsd) return 0 ;;
|
||||
linux) return 0 ;;
|
||||
android) return 0 ;;
|
||||
nacl) return 0 ;;
|
||||
netbsd) return 0 ;;
|
||||
openbsd) return 0 ;;
|
||||
plan9) return 0 ;;
|
||||
solaris) return 0 ;;
|
||||
windows) return 0 ;;
|
||||
esac
|
||||
echo "$0: uname_os_check: internal error '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib"
|
||||
return 1
|
||||
}
|
||||
uname_arch_check() {
|
||||
arch=$(uname_arch)
|
||||
case "$arch" in
|
||||
386) return 0 ;;
|
||||
amd64) return 0 ;;
|
||||
arm64) return 0 ;;
|
||||
armv5) return 0 ;;
|
||||
armv6) return 0 ;;
|
||||
armv7) return 0 ;;
|
||||
ppc64) return 0 ;;
|
||||
ppc64le) return 0 ;;
|
||||
mips) return 0 ;;
|
||||
mipsle) return 0 ;;
|
||||
mips64) return 0 ;;
|
||||
mips64le) return 0 ;;
|
||||
s390x) return 0 ;;
|
||||
amd64p32) return 0 ;;
|
||||
esac
|
||||
echo "$0: uname_arch_check: internal error '$(uname -m)' got converted to '$arch' which is not a GOARCH value. Please file bug report at https://github.com/client9/shlib"
|
||||
return 1
|
||||
}
|
||||
untar() {
|
||||
tarball=$1
|
||||
case "${tarball}" in
|
||||
*.tar.gz | *.tgz) tar -xzf "${tarball}" ;;
|
||||
*.tar) tar -xf "${tarball}" ;;
|
||||
*.zip) unzip "${tarball}" ;;
|
||||
*)
|
||||
echo "Unknown archive format for ${tarball}"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
mktmpdir() {
|
||||
test -z "$TMPDIR" && TMPDIR="$(mktemp -d)"
|
||||
mkdir -p "${TMPDIR}"
|
||||
echo "${TMPDIR}"
|
||||
}
|
||||
http_download() {
|
||||
local_file=$1
|
||||
source_url=$2
|
||||
header=$3
|
||||
headerflag=''
|
||||
destflag=''
|
||||
if is_command curl; then
|
||||
cmd='curl --fail -sSL'
|
||||
destflag='-o'
|
||||
headerflag='-H'
|
||||
elif is_command wget; then
|
||||
cmd='wget -q'
|
||||
destflag='-O'
|
||||
headerflag='--header'
|
||||
else
|
||||
echo "http_download: unable to find wget or curl"
|
||||
return 1
|
||||
fi
|
||||
if [ -z "$header" ]; then
|
||||
$cmd $destflag "$local_file" "$source_url"
|
||||
else
|
||||
$cmd $headerflag "$header" $destflag "$local_file" "$source_url"
|
||||
fi
|
||||
}
|
||||
github_api() {
|
||||
local_file=$1
|
||||
source_url=$2
|
||||
header=""
|
||||
case "$source_url" in
|
||||
https://api.github.com*)
|
||||
test -z "$GITHUB_TOKEN" || header="Authorization: token $GITHUB_TOKEN"
|
||||
;;
|
||||
esac
|
||||
http_download "$local_file" "$source_url" "$header"
|
||||
}
|
||||
github_last_release() {
|
||||
owner_repo=$1
|
||||
giturl="https://api.github.com/repos/${owner_repo}/releases/latest"
|
||||
html=$(github_api - "$giturl")
|
||||
version=$(echo "$html" | grep -m 1 "\"tag_name\":" | cut -f4 -d'"')
|
||||
test -z "$version" && return 1
|
||||
echo "$version"
|
||||
}
|
||||
hash_sha256() {
|
||||
TARGET=${1:-/dev/stdin}
|
||||
if is_command gsha256sum; then
|
||||
hash=$(gsha256sum "$TARGET") || return 1
|
||||
echo "$hash" | cut -d ' ' -f 1
|
||||
elif is_command sha256sum; then
|
||||
hash=$(sha256sum "$TARGET") || return 1
|
||||
echo "$hash" | cut -d ' ' -f 1
|
||||
elif is_command shasum; then
|
||||
hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1
|
||||
echo "$hash" | cut -d ' ' -f 1
|
||||
elif is_command openssl; then
|
||||
hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1
|
||||
echo "$hash" | cut -d ' ' -f a
|
||||
else
|
||||
echo "hash_sha256: unable to find command to compute sha-256 hash"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
hash_sha256_verify() {
|
||||
TARGET=$1
|
||||
checksums=$2
|
||||
if [ -z "$checksums" ]; then
|
||||
echo "hash_sha256_verify: checksum file not specified in arg2"
|
||||
return 1
|
||||
fi
|
||||
BASENAME=${TARGET##*/}
|
||||
want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1)
|
||||
if [ -z "$want" ]; then
|
||||
echo "hash_sha256_verify: unable to find checksum for '${TARGET}' in '${checksums}'"
|
||||
return 1
|
||||
fi
|
||||
got=$(hash_sha256 "$TARGET")
|
||||
if [ "$want" != "$got" ]; then
|
||||
echo "hash_sha256_verify: checksum for '$TARGET' did not verify ${want} vs $got"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
cat /dev/null <<EOF
|
||||
------------------------------------------------------------------------
|
||||
End of functions from https://github.com/client9/shlib
|
||||
------------------------------------------------------------------------
|
||||
EOF
|
||||
|
||||
OWNER=client9
|
||||
REPO=misspell
|
||||
BINARY=misspell
|
||||
FORMAT=tar.gz
|
||||
OS=$(uname_os)
|
||||
ARCH=$(uname_arch)
|
||||
PREFIX="$OWNER/$REPO"
|
||||
PLATFORM="${OS}/${ARCH}"
|
||||
GITHUB_DOWNLOAD=https://github.com/${OWNER}/${REPO}/releases/download
|
||||
|
||||
uname_os_check "$OS"
|
||||
uname_arch_check "$ARCH"
|
||||
|
||||
parse_args "$@"
|
||||
|
||||
check_platform
|
||||
|
||||
adjust_version
|
||||
|
||||
adjust_format
|
||||
|
||||
adjust_os
|
||||
|
||||
adjust_arch
|
||||
|
||||
echo "$PREFIX: found version ${VERSION} for ${OS}/${ARCH}"
|
||||
|
||||
NAME=${BINARY}_${VERSION}_${OS}_${ARCH}
|
||||
TARBALL=${NAME}.${FORMAT}
|
||||
TARBALL_URL=${GITHUB_DOWNLOAD}/v${VERSION}/${TARBALL}
|
||||
CHECKSUM=${REPO}_checksums.txt
|
||||
CHECKSUM_URL=${GITHUB_DOWNLOAD}/v${VERSION}/${CHECKSUM}
|
||||
|
||||
# Adjust binary name if windows
|
||||
if [ "$OS" = "windows" ]; then
|
||||
BINARY="${BINARY}.exe"
|
||||
fi
|
||||
|
||||
execute
|
|
@ -0,0 +1,48 @@
|
|||
// Package misspell corrects commonly misspelled English words in source files.
|
||||
package misspell
|
||||
|
||||
// Legal provides licensing info.
|
||||
const Legal = `
|
||||
Execept where noted below, the source code for misspell is
|
||||
copyright Nick Galbreath and distribution is allowed under a
|
||||
MIT license. See the following for details:
|
||||
|
||||
* https://github.com/client9/misspell/blob/master/LICENSE
|
||||
* https://tldrlegal.com/license/mit-license
|
||||
|
||||
Misspell makes uses of the Golang standard library and
|
||||
contains a modified version of Golang's strings.Replacer
|
||||
which are covered under a BSD License.
|
||||
|
||||
* https://golang.org/pkg/strings/#Replacer
|
||||
* https://golang.org/src/strings/replace.go
|
||||
* https://github.com/golang/go/blob/master/LICENSE
|
||||
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
`
|
|
@ -0,0 +1,210 @@
|
|||
package misspell
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// The number of possible binary formats is very large
|
||||
// items that might be checked into a repo or be an
|
||||
// artifact of a build. Additions welcome.
|
||||
//
|
||||
// Golang's internal table is very small and can't be
|
||||
// relied on. Even then things like ".js" have a mime
|
||||
// type of "application/javascipt" which isn't very helpful.
|
||||
// "[x]" means we have sniff test and suffix test should be eliminated
|
||||
var binary = map[string]bool{
|
||||
".a": true, // [ ] archive
|
||||
".bin": true, // [ ] binary
|
||||
".bz2": true, // [ ] compression
|
||||
".class": true, // [x] Java class file
|
||||
".dll": true, // [ ] shared library
|
||||
".exe": true, // [ ] binary
|
||||
".gif": true, // [ ] image
|
||||
".gpg": true, // [x] text, but really all base64
|
||||
".gz": true, // [ ] compression
|
||||
".ico": true, // [ ] image
|
||||
".jar": true, // [x] archive
|
||||
".jpeg": true, // [ ] image
|
||||
".jpg": true, // [ ] image
|
||||
".mp3": true, // [ ] audio
|
||||
".mp4": true, // [ ] video
|
||||
".mpeg": true, // [ ] video
|
||||
".o": true, // [ ] object file
|
||||
".pdf": true, // [x] pdf
|
||||
".png": true, // [x] image
|
||||
".pyc": true, // [ ] Python bytecode
|
||||
".pyo": true, // [ ] Python bytecode
|
||||
".so": true, // [x] shared library
|
||||
".swp": true, // [ ] vim swap file
|
||||
".tar": true, // [ ] archive
|
||||
".tiff": true, // [ ] image
|
||||
".woff": true, // [ ] font
|
||||
".woff2": true, // [ ] font
|
||||
".xz": true, // [ ] compression
|
||||
".z": true, // [ ] compression
|
||||
".zip": true, // [x] archive
|
||||
}
|
||||
|
||||
// isBinaryFilename returns true if the file is likely to be binary
|
||||
//
|
||||
// Better heuristics could be done here, in particular a binary
|
||||
// file is unlikely to be UTF-8 encoded. However this is cheap
|
||||
// and will solve the immediate need of making sure common
|
||||
// binary formats are not corrupted by mistake.
|
||||
func isBinaryFilename(s string) bool {
|
||||
return binary[strings.ToLower(filepath.Ext(s))]
|
||||
}
|
||||
|
||||
var scm = map[string]bool{
|
||||
".bzr": true,
|
||||
".git": true,
|
||||
".hg": true,
|
||||
".svn": true,
|
||||
"CVS": true,
|
||||
}
|
||||
|
||||
// isSCMPath returns true if the path is likely part of a (private) SCM
|
||||
// directory. E.g. ./git/something = true
|
||||
func isSCMPath(s string) bool {
|
||||
// hack for .git/COMMIT_EDITMSG and .git/TAG_EDITMSG
|
||||
// normally we don't look at anything in .git
|
||||
// but COMMIT_EDITMSG and TAG_EDITMSG are used as
|
||||
// temp files for git commits. Allowing misspell to inspect
|
||||
// these files allows for commit-msg hooks
|
||||
// https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks
|
||||
if strings.Contains(filepath.Base(s), "EDITMSG") {
|
||||
return false
|
||||
}
|
||||
parts := strings.Split(filepath.Clean(s), string(filepath.Separator))
|
||||
for _, dir := range parts {
|
||||
if scm[dir] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
var magicHeaders = [][]byte{
|
||||
// Issue #68
|
||||
// PGP messages and signatures are "text" but really just
|
||||
// blobs of base64-text and should not be misspell-checked
|
||||
[]byte("-----BEGIN PGP MESSAGE-----"),
|
||||
[]byte("-----BEGIN PGP SIGNATURE-----"),
|
||||
|
||||
// ELF
|
||||
{0x7f, 0x45, 0x4c, 0x46},
|
||||
|
||||
// Postscript
|
||||
{0x25, 0x21, 0x50, 0x53},
|
||||
|
||||
// PDF
|
||||
{0x25, 0x50, 0x44, 0x46},
|
||||
|
||||
// Java class file
|
||||
// https://en.wikipedia.org/wiki/Java_class_file
|
||||
{0xCA, 0xFE, 0xBA, 0xBE},
|
||||
|
||||
// PNG
|
||||
// https://en.wikipedia.org/wiki/Portable_Network_Graphics
|
||||
{0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a},
|
||||
|
||||
// ZIP, JAR, ODF, OOXML
|
||||
{0x50, 0x4B, 0x03, 0x04},
|
||||
{0x50, 0x4B, 0x05, 0x06},
|
||||
{0x50, 0x4B, 0x07, 0x08},
|
||||
}
|
||||
|
||||
func isTextFile(raw []byte) bool {
|
||||
for _, magic := range magicHeaders {
|
||||
if bytes.HasPrefix(raw, magic) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// allow any text/ type with utf-8 encoding
|
||||
// DetectContentType sometimes returns charset=utf-16 for XML stuff
|
||||
// in which case ignore.
|
||||
mime := http.DetectContentType(raw)
|
||||
return strings.HasPrefix(mime, "text/") && strings.HasSuffix(mime, "charset=utf-8")
|
||||
}
|
||||
|
||||
// ReadTextFile returns the contents of a file, first testing if it is a text file
|
||||
// returns ("", nil) if not a text file
|
||||
// returns ("", error) if error
|
||||
// returns (string, nil) if text
|
||||
//
|
||||
// unfortunately, in worse case, this does
|
||||
// 1 stat
|
||||
// 1 open,read,close of 512 bytes
|
||||
// 1 more stat,open, read everything, close (via ioutil.ReadAll)
|
||||
// This could be kinder to the filesystem.
|
||||
//
|
||||
// This uses some heuristics of the file's extension (e.g. .zip, .txt) and
|
||||
// uses a sniffer to determine if the file is text or not.
|
||||
// Using file extensions isn't great, but probably
|
||||
// good enough for real-world use.
|
||||
// Golang's built in sniffer is problematic for differnet reasons. It's
|
||||
// optimized for HTML, and is very limited in detection. It would be good
|
||||
// to explicitly add some tests for ELF/DWARF formats to make sure we never
|
||||
// corrupt binary files.
|
||||
func ReadTextFile(filename string) (string, error) {
|
||||
if isBinaryFilename(filename) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
if isSCMPath(filename) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
fstat, err := os.Stat(filename)
|
||||
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Unable to stat %q: %s", filename, err)
|
||||
}
|
||||
|
||||
// directory: nothing to do.
|
||||
if fstat.IsDir() {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// avoid reading in multi-gig files
|
||||
// if input is large, read the first 512 bytes to sniff type
|
||||
// if not-text, then exit
|
||||
isText := false
|
||||
if fstat.Size() > 50000 {
|
||||
fin, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Unable to open large file %q: %s", filename, err)
|
||||
}
|
||||
defer fin.Close()
|
||||
buf := make([]byte, 512)
|
||||
_, err = io.ReadFull(fin, buf)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Unable to read 512 bytes from %q: %s", filename, err)
|
||||
}
|
||||
if !isTextFile(buf) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// set so we don't double check this file
|
||||
isText = true
|
||||
}
|
||||
|
||||
// read in whole file
|
||||
raw, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Unable to read all %q: %s", filename, err)
|
||||
}
|
||||
|
||||
if !isText && !isTextFile(raw) {
|
||||
return "", nil
|
||||
}
|
||||
return string(raw), nil
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
package misspell
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
reEmail = regexp.MustCompile(`[a-zA-Z0-9_.%+-]+@[a-zA-Z0-9-.]+\.[a-zA-Z]{2,6}[^a-zA-Z]`)
|
||||
reHost = regexp.MustCompile(`[a-zA-Z0-9-.]+\.[a-zA-Z]+`)
|
||||
reBackslash = regexp.MustCompile(`\\[a-z]`)
|
||||
)
|
||||
|
||||
// RemovePath attempts to strip away embedded file system paths, e.g.
|
||||
// /foo/bar or /static/myimg.png
|
||||
//
|
||||
// TODO: windows style
|
||||
//
|
||||
func RemovePath(s string) string {
|
||||
out := bytes.Buffer{}
|
||||
var idx int
|
||||
for len(s) > 0 {
|
||||
if idx = strings.IndexByte(s, '/'); idx == -1 {
|
||||
out.WriteString(s)
|
||||
break
|
||||
}
|
||||
|
||||
if idx > 0 {
|
||||
idx--
|
||||
}
|
||||
|
||||
var chclass string
|
||||
switch s[idx] {
|
||||
case '/', ' ', '\n', '\t', '\r':
|
||||
chclass = " \n\r\t"
|
||||
case '[':
|
||||
chclass = "]\n"
|
||||
case '(':
|
||||
chclass = ")\n"
|
||||
default:
|
||||
out.WriteString(s[:idx+2])
|
||||
s = s[idx+2:]
|
||||
continue
|
||||
}
|
||||
|
||||
endx := strings.IndexAny(s[idx+1:], chclass)
|
||||
if endx != -1 {
|
||||
out.WriteString(s[:idx+1])
|
||||
out.Write(bytes.Repeat([]byte{' '}, endx))
|
||||
s = s[idx+endx+1:]
|
||||
} else {
|
||||
out.WriteString(s)
|
||||
break
|
||||
}
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
// replaceWithBlanks returns a string with the same number of spaces as the input
|
||||
func replaceWithBlanks(s string) string {
|
||||
return strings.Repeat(" ", len(s))
|
||||
}
|
||||
|
||||
// RemoveEmail remove email-like strings, e.g. "nickg+junk@xfoobar.com", "nickg@xyz.abc123.biz"
|
||||
func RemoveEmail(s string) string {
|
||||
return reEmail.ReplaceAllStringFunc(s, replaceWithBlanks)
|
||||
}
|
||||
|
||||
// RemoveHost removes host-like strings "foobar.com" "abc123.fo1231.biz"
|
||||
func RemoveHost(s string) string {
|
||||
return reHost.ReplaceAllStringFunc(s, replaceWithBlanks)
|
||||
}
|
||||
|
||||
// RemoveBackslashEscapes removes characters that are preceeded by a backslash
|
||||
// commonly found in printf format stringd "\nto"
|
||||
func removeBackslashEscapes(s string) string {
|
||||
return reBackslash.ReplaceAllStringFunc(s, replaceWithBlanks)
|
||||
}
|
||||
|
||||
// RemoveNotWords blanks out all the not words
|
||||
func RemoveNotWords(s string) string {
|
||||
// do most selective/specific first
|
||||
return removeBackslashEscapes(RemoveHost(RemoveEmail(RemovePath(StripURL(s)))))
|
||||
}
|
|
@ -0,0 +1,246 @@
|
|||
package misspell
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
"text/scanner"
|
||||
)
|
||||
|
||||
func max(x, y int) int {
|
||||
if x > y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
func inArray(haystack []string, needle string) bool {
|
||||
for _, word := range haystack {
|
||||
if needle == word {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
var wordRegexp = regexp.MustCompile(`[a-zA-Z0-9']+`)
|
||||
|
||||
// Diff is datastructure showing what changed in a single line
|
||||
type Diff struct {
|
||||
Filename string
|
||||
FullLine string
|
||||
Line int
|
||||
Column int
|
||||
Original string
|
||||
Corrected string
|
||||
}
|
||||
|
||||
// Replacer is the main struct for spelling correction
|
||||
type Replacer struct {
|
||||
Replacements []string
|
||||
Debug bool
|
||||
engine *StringReplacer
|
||||
corrected map[string]string
|
||||
}
|
||||
|
||||
// New creates a new default Replacer using the main rule list
|
||||
func New() *Replacer {
|
||||
r := Replacer{
|
||||
Replacements: DictMain,
|
||||
}
|
||||
r.Compile()
|
||||
return &r
|
||||
}
|
||||
|
||||
// RemoveRule deletes existings rules.
|
||||
// TODO: make inplace to save memory
|
||||
func (r *Replacer) RemoveRule(ignore []string) {
|
||||
newwords := make([]string, 0, len(r.Replacements))
|
||||
for i := 0; i < len(r.Replacements); i += 2 {
|
||||
if inArray(ignore, r.Replacements[i]) {
|
||||
continue
|
||||
}
|
||||
newwords = append(newwords, r.Replacements[i:i+2]...)
|
||||
}
|
||||
r.engine = nil
|
||||
r.Replacements = newwords
|
||||
}
|
||||
|
||||
// AddRuleList appends new rules.
|
||||
// Input is in the same form as Strings.Replacer: [ old1, new1, old2, new2, ....]
|
||||
// Note: does not check for duplictes
|
||||
func (r *Replacer) AddRuleList(additions []string) {
|
||||
r.engine = nil
|
||||
r.Replacements = append(r.Replacements, additions...)
|
||||
}
|
||||
|
||||
// Compile compiles the rules. Required before using the Replace functions
|
||||
func (r *Replacer) Compile() {
|
||||
|
||||
r.corrected = make(map[string]string, len(r.Replacements)/2)
|
||||
for i := 0; i < len(r.Replacements); i += 2 {
|
||||
r.corrected[r.Replacements[i]] = r.Replacements[i+1]
|
||||
}
|
||||
r.engine = NewStringReplacer(r.Replacements...)
|
||||
}
|
||||
|
||||
/*
|
||||
line1 and line2 are different
|
||||
extract words from each line1
|
||||
|
||||
replace word -> newword
|
||||
if word == new-word
|
||||
continue
|
||||
if new-word in list of replacements
|
||||
continue
|
||||
new word not original, and not in list of replacements
|
||||
some substring got mixed up. UNdo
|
||||
*/
|
||||
func (r *Replacer) recheckLine(s string, lineNum int, buf io.Writer, next func(Diff)) {
|
||||
first := 0
|
||||
redacted := RemoveNotWords(s)
|
||||
|
||||
idx := wordRegexp.FindAllStringIndex(redacted, -1)
|
||||
for _, ab := range idx {
|
||||
word := s[ab[0]:ab[1]]
|
||||
newword := r.engine.Replace(word)
|
||||
if newword == word {
|
||||
// no replacement done
|
||||
continue
|
||||
}
|
||||
|
||||
// ignore camelCase words
|
||||
// https://github.com/client9/misspell/issues/113
|
||||
if CaseStyle(word) == CaseUnknown {
|
||||
continue
|
||||
}
|
||||
|
||||
if StringEqualFold(r.corrected[strings.ToLower(word)], newword) {
|
||||
// word got corrected into something we know
|
||||
io.WriteString(buf, s[first:ab[0]])
|
||||
io.WriteString(buf, newword)
|
||||
first = ab[1]
|
||||
next(Diff{
|
||||
FullLine: s,
|
||||
Line: lineNum,
|
||||
Original: word,
|
||||
Corrected: newword,
|
||||
Column: ab[0],
|
||||
})
|
||||
continue
|
||||
}
|
||||
// Word got corrected into something unknown. Ignore it
|
||||
}
|
||||
io.WriteString(buf, s[first:])
|
||||
}
|
||||
|
||||
// ReplaceGo is a specialized routine for correcting Golang source
|
||||
// files. Currently only checks comments, not identifiers for
|
||||
// spelling.
|
||||
func (r *Replacer) ReplaceGo(input string) (string, []Diff) {
|
||||
var s scanner.Scanner
|
||||
s.Init(strings.NewReader(input))
|
||||
s.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments
|
||||
lastPos := 0
|
||||
output := ""
|
||||
Loop:
|
||||
for {
|
||||
switch s.Scan() {
|
||||
case scanner.Comment:
|
||||
origComment := s.TokenText()
|
||||
newComment := r.engine.Replace(origComment)
|
||||
|
||||
if origComment != newComment {
|
||||
// s.Pos().Offset is the end of the current token
|
||||
// subtract len(origComment) to get the start of the token
|
||||
offset := s.Pos().Offset
|
||||
output = output + input[lastPos:offset-len(origComment)] + newComment
|
||||
lastPos = offset
|
||||
}
|
||||
case scanner.EOF:
|
||||
break Loop
|
||||
}
|
||||
}
|
||||
|
||||
if lastPos == 0 {
|
||||
// no changes, no copies
|
||||
return input, nil
|
||||
}
|
||||
if lastPos < len(input) {
|
||||
output = output + input[lastPos:]
|
||||
}
|
||||
diffs := make([]Diff, 0, 8)
|
||||
buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
|
||||
// faster that making a bytes.Buffer and bufio.ReadString
|
||||
outlines := strings.SplitAfter(output, "\n")
|
||||
inlines := strings.SplitAfter(input, "\n")
|
||||
for i := 0; i < len(inlines); i++ {
|
||||
if inlines[i] == outlines[i] {
|
||||
buf.WriteString(outlines[i])
|
||||
continue
|
||||
}
|
||||
r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
|
||||
diffs = append(diffs, d)
|
||||
})
|
||||
}
|
||||
|
||||
return buf.String(), diffs
|
||||
|
||||
}
|
||||
|
||||
// Replace is corrects misspellings in input, returning corrected version
|
||||
// along with a list of diffs.
|
||||
func (r *Replacer) Replace(input string) (string, []Diff) {
|
||||
output := r.engine.Replace(input)
|
||||
if input == output {
|
||||
return input, nil
|
||||
}
|
||||
diffs := make([]Diff, 0, 8)
|
||||
buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
|
||||
// faster that making a bytes.Buffer and bufio.ReadString
|
||||
outlines := strings.SplitAfter(output, "\n")
|
||||
inlines := strings.SplitAfter(input, "\n")
|
||||
for i := 0; i < len(inlines); i++ {
|
||||
if inlines[i] == outlines[i] {
|
||||
buf.WriteString(outlines[i])
|
||||
continue
|
||||
}
|
||||
r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
|
||||
diffs = append(diffs, d)
|
||||
})
|
||||
}
|
||||
|
||||
return buf.String(), diffs
|
||||
}
|
||||
|
||||
// ReplaceReader applies spelling corrections to a reader stream. Diffs are
|
||||
// emitted through a callback.
|
||||
func (r *Replacer) ReplaceReader(raw io.Reader, w io.Writer, next func(Diff)) error {
|
||||
var (
|
||||
err error
|
||||
line string
|
||||
lineNum int
|
||||
)
|
||||
reader := bufio.NewReader(raw)
|
||||
for err == nil {
|
||||
lineNum++
|
||||
line, err = reader.ReadString('\n')
|
||||
|
||||
// if it's EOF, then line has the last line
|
||||
// don't like the check of err here and
|
||||
// in for loop
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
// easily 5x faster than regexp+map
|
||||
if line == r.engine.Replace(line) {
|
||||
io.WriteString(w, line)
|
||||
continue
|
||||
}
|
||||
// but it can be inaccurate, so we need to double check
|
||||
r.recheckLine(line, lineNum, w, next)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,336 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package misspell
|
||||
|
||||
import (
|
||||
"io"
|
||||
// "log"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// StringReplacer replaces a list of strings with replacements.
|
||||
// It is safe for concurrent use by multiple goroutines.
|
||||
type StringReplacer struct {
|
||||
r replacer
|
||||
}
|
||||
|
||||
// replacer is the interface that a replacement algorithm needs to implement.
|
||||
type replacer interface {
|
||||
Replace(s string) string
|
||||
WriteString(w io.Writer, s string) (n int, err error)
|
||||
}
|
||||
|
||||
// NewStringReplacer returns a new Replacer from a list of old, new string pairs.
|
||||
// Replacements are performed in order, without overlapping matches.
|
||||
func NewStringReplacer(oldnew ...string) *StringReplacer {
|
||||
if len(oldnew)%2 == 1 {
|
||||
panic("strings.NewReplacer: odd argument count")
|
||||
}
|
||||
|
||||
return &StringReplacer{r: makeGenericReplacer(oldnew)}
|
||||
}
|
||||
|
||||
// Replace returns a copy of s with all replacements performed.
|
||||
func (r *StringReplacer) Replace(s string) string {
|
||||
return r.r.Replace(s)
|
||||
}
|
||||
|
||||
// WriteString writes s to w with all replacements performed.
|
||||
func (r *StringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
|
||||
return r.r.WriteString(w, s)
|
||||
}
|
||||
|
||||
// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
|
||||
// and values may be empty. For example, the trie containing keys "ax", "ay",
|
||||
// "bcbc", "x" and "xy" could have eight nodes:
|
||||
//
|
||||
// n0 -
|
||||
// n1 a-
|
||||
// n2 .x+
|
||||
// n3 .y+
|
||||
// n4 b-
|
||||
// n5 .cbc+
|
||||
// n6 x+
|
||||
// n7 .y+
|
||||
//
|
||||
// n0 is the root node, and its children are n1, n4 and n6; n1's children are
|
||||
// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
|
||||
// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
|
||||
// (marked with a trailing "+") are complete keys.
|
||||
type trieNode struct {
|
||||
// value is the value of the trie node's key/value pair. It is empty if
|
||||
// this node is not a complete key.
|
||||
value string
|
||||
// priority is the priority (higher is more important) of the trie node's
|
||||
// key/value pair; keys are not necessarily matched shortest- or longest-
|
||||
// first. Priority is positive if this node is a complete key, and zero
|
||||
// otherwise. In the example above, positive/zero priorities are marked
|
||||
// with a trailing "+" or "-".
|
||||
priority int
|
||||
|
||||
// A trie node may have zero, one or more child nodes:
|
||||
// * if the remaining fields are zero, there are no children.
|
||||
// * if prefix and next are non-zero, there is one child in next.
|
||||
// * if table is non-zero, it defines all the children.
|
||||
//
|
||||
// Prefixes are preferred over tables when there is one child, but the
|
||||
// root node always uses a table for lookup efficiency.
|
||||
|
||||
// prefix is the difference in keys between this trie node and the next.
|
||||
// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
|
||||
// Node n5 has no children and so has zero prefix, next and table fields.
|
||||
prefix string
|
||||
next *trieNode
|
||||
|
||||
// table is a lookup table indexed by the next byte in the key, after
|
||||
// remapping that byte through genericReplacer.mapping to create a dense
|
||||
// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
|
||||
// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
|
||||
// genericReplacer.tableSize will be 5. Node n0's table will be
|
||||
// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
|
||||
// 'a', 'b' and 'x'.
|
||||
table []*trieNode
|
||||
}
|
||||
|
||||
func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
|
||||
if key == "" {
|
||||
if t.priority == 0 {
|
||||
t.value = val
|
||||
t.priority = priority
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if t.prefix != "" {
|
||||
// Need to split the prefix among multiple nodes.
|
||||
var n int // length of the longest common prefix
|
||||
for ; n < len(t.prefix) && n < len(key); n++ {
|
||||
if t.prefix[n] != key[n] {
|
||||
break
|
||||
}
|
||||
}
|
||||
if n == len(t.prefix) {
|
||||
t.next.add(key[n:], val, priority, r)
|
||||
} else if n == 0 {
|
||||
// First byte differs, start a new lookup table here. Looking up
|
||||
// what is currently t.prefix[0] will lead to prefixNode, and
|
||||
// looking up key[0] will lead to keyNode.
|
||||
var prefixNode *trieNode
|
||||
if len(t.prefix) == 1 {
|
||||
prefixNode = t.next
|
||||
} else {
|
||||
prefixNode = &trieNode{
|
||||
prefix: t.prefix[1:],
|
||||
next: t.next,
|
||||
}
|
||||
}
|
||||
keyNode := new(trieNode)
|
||||
t.table = make([]*trieNode, r.tableSize)
|
||||
t.table[r.mapping[t.prefix[0]]] = prefixNode
|
||||
t.table[r.mapping[key[0]]] = keyNode
|
||||
t.prefix = ""
|
||||
t.next = nil
|
||||
keyNode.add(key[1:], val, priority, r)
|
||||
} else {
|
||||
// Insert new node after the common section of the prefix.
|
||||
next := &trieNode{
|
||||
prefix: t.prefix[n:],
|
||||
next: t.next,
|
||||
}
|
||||
t.prefix = t.prefix[:n]
|
||||
t.next = next
|
||||
next.add(key[n:], val, priority, r)
|
||||
}
|
||||
} else if t.table != nil {
|
||||
// Insert into existing table.
|
||||
m := r.mapping[key[0]]
|
||||
if t.table[m] == nil {
|
||||
t.table[m] = new(trieNode)
|
||||
}
|
||||
t.table[m].add(key[1:], val, priority, r)
|
||||
} else {
|
||||
t.prefix = key
|
||||
t.next = new(trieNode)
|
||||
t.next.add("", val, priority, r)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
|
||||
// Iterate down the trie to the end, and grab the value and keylen with
|
||||
// the highest priority.
|
||||
bestPriority := 0
|
||||
node := &r.root
|
||||
n := 0
|
||||
for node != nil {
|
||||
if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
|
||||
bestPriority = node.priority
|
||||
val = node.value
|
||||
keylen = n
|
||||
found = true
|
||||
}
|
||||
|
||||
if s == "" {
|
||||
break
|
||||
}
|
||||
if node.table != nil {
|
||||
index := r.mapping[ByteToLower(s[0])]
|
||||
if int(index) == r.tableSize {
|
||||
break
|
||||
}
|
||||
node = node.table[index]
|
||||
s = s[1:]
|
||||
n++
|
||||
} else if node.prefix != "" && StringHasPrefixFold(s, node.prefix) {
|
||||
n += len(node.prefix)
|
||||
s = s[len(node.prefix):]
|
||||
node = node.next
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// genericReplacer is the fully generic algorithm.
|
||||
// It's used as a fallback when nothing faster can be used.
|
||||
type genericReplacer struct {
|
||||
root trieNode
|
||||
// tableSize is the size of a trie node's lookup table. It is the number
|
||||
// of unique key bytes.
|
||||
tableSize int
|
||||
// mapping maps from key bytes to a dense index for trieNode.table.
|
||||
mapping [256]byte
|
||||
}
|
||||
|
||||
func makeGenericReplacer(oldnew []string) *genericReplacer {
|
||||
r := new(genericReplacer)
|
||||
// Find each byte used, then assign them each an index.
|
||||
for i := 0; i < len(oldnew); i += 2 {
|
||||
key := strings.ToLower(oldnew[i])
|
||||
for j := 0; j < len(key); j++ {
|
||||
r.mapping[key[j]] = 1
|
||||
}
|
||||
}
|
||||
|
||||
for _, b := range r.mapping {
|
||||
r.tableSize += int(b)
|
||||
}
|
||||
|
||||
var index byte
|
||||
for i, b := range r.mapping {
|
||||
if b == 0 {
|
||||
r.mapping[i] = byte(r.tableSize)
|
||||
} else {
|
||||
r.mapping[i] = index
|
||||
index++
|
||||
}
|
||||
}
|
||||
// Ensure root node uses a lookup table (for performance).
|
||||
r.root.table = make([]*trieNode, r.tableSize)
|
||||
|
||||
for i := 0; i < len(oldnew); i += 2 {
|
||||
r.root.add(strings.ToLower(oldnew[i]), oldnew[i+1], len(oldnew)-i, r)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
type appendSliceWriter []byte
|
||||
|
||||
// Write writes to the buffer to satisfy io.Writer.
|
||||
func (w *appendSliceWriter) Write(p []byte) (int, error) {
|
||||
*w = append(*w, p...)
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
// WriteString writes to the buffer without string->[]byte->string allocations.
|
||||
func (w *appendSliceWriter) WriteString(s string) (int, error) {
|
||||
*w = append(*w, s...)
|
||||
return len(s), nil
|
||||
}
|
||||
|
||||
type stringWriterIface interface {
|
||||
WriteString(string) (int, error)
|
||||
}
|
||||
|
||||
type stringWriter struct {
|
||||
w io.Writer
|
||||
}
|
||||
|
||||
func (w stringWriter) WriteString(s string) (int, error) {
|
||||
return w.w.Write([]byte(s))
|
||||
}
|
||||
|
||||
func getStringWriter(w io.Writer) stringWriterIface {
|
||||
sw, ok := w.(stringWriterIface)
|
||||
if !ok {
|
||||
sw = stringWriter{w}
|
||||
}
|
||||
return sw
|
||||
}
|
||||
|
||||
func (r *genericReplacer) Replace(s string) string {
|
||||
buf := make(appendSliceWriter, 0, len(s))
|
||||
r.WriteString(&buf, s)
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
|
||||
sw := getStringWriter(w)
|
||||
var last, wn int
|
||||
var prevMatchEmpty bool
|
||||
for i := 0; i <= len(s); {
|
||||
// Fast path: s[i] is not a prefix of any pattern.
|
||||
if i != len(s) && r.root.priority == 0 {
|
||||
index := int(r.mapping[ByteToLower(s[i])])
|
||||
if index == r.tableSize || r.root.table[index] == nil {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore the empty match iff the previous loop found the empty match.
|
||||
val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
|
||||
prevMatchEmpty = match && keylen == 0
|
||||
if match {
|
||||
orig := s[i : i+keylen]
|
||||
switch CaseStyle(orig) {
|
||||
case CaseUnknown:
|
||||
// pretend we didn't match
|
||||
// i++
|
||||
// continue
|
||||
case CaseUpper:
|
||||
val = strings.ToUpper(val)
|
||||
case CaseLower:
|
||||
val = strings.ToLower(val)
|
||||
case CaseTitle:
|
||||
if len(val) < 2 {
|
||||
val = strings.ToUpper(val)
|
||||
} else {
|
||||
val = strings.ToUpper(val[:1]) + strings.ToLower(val[1:])
|
||||
}
|
||||
}
|
||||
wn, err = sw.WriteString(s[last:i])
|
||||
n += wn
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
//log.Printf("%d: Going to correct %q with %q", i, s[i:i+keylen], val)
|
||||
wn, err = sw.WriteString(val)
|
||||
n += wn
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
i += keylen
|
||||
last = i
|
||||
continue
|
||||
}
|
||||
i++
|
||||
}
|
||||
if last != len(s) {
|
||||
wn, err = sw.WriteString(s[last:])
|
||||
n += wn
|
||||
}
|
||||
return
|
||||
}
|
|
@ -0,0 +1,421 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package misspell_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
. "github.com/client9/misspell"
|
||||
)
|
||||
|
||||
var htmlEscaper = NewStringReplacer(
|
||||
"&", "&",
|
||||
"<", "<",
|
||||
">", ">",
|
||||
`"`, """,
|
||||
"'", "'",
|
||||
)
|
||||
|
||||
var htmlUnescaper = NewStringReplacer(
|
||||
"&", "&",
|
||||
"<", "<",
|
||||
">", ">",
|
||||
""", `"`,
|
||||
"'", "'",
|
||||
)
|
||||
|
||||
// The http package's old HTML escaping function.
|
||||
func oldHTMLEscape(s string) string {
|
||||
s = strings.Replace(s, "&", "&", -1)
|
||||
s = strings.Replace(s, "<", "<", -1)
|
||||
s = strings.Replace(s, ">", ">", -1)
|
||||
s = strings.Replace(s, `"`, """, -1)
|
||||
s = strings.Replace(s, "'", "'", -1)
|
||||
return s
|
||||
}
|
||||
|
||||
var capitalLetters = NewStringReplacer("a", "A", "b", "B")
|
||||
|
||||
// TestReplacer tests the replacer implementations.
|
||||
func TestReplacer(t *testing.T) {
|
||||
type testCase struct {
|
||||
r *StringReplacer
|
||||
in, out string
|
||||
}
|
||||
var testCases []testCase
|
||||
|
||||
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
|
||||
str := func(b byte) string {
|
||||
return string([]byte{b})
|
||||
}
|
||||
var s []string
|
||||
|
||||
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
|
||||
for i := 0; i < 256; i++ {
|
||||
s = append(s, str(byte(i)), str(byte(i+1)))
|
||||
}
|
||||
inc := NewStringReplacer(s...)
|
||||
|
||||
// Test cases with 1-byte old strings, 1-byte new strings.
|
||||
testCases = append(testCases,
|
||||
testCase{capitalLetters, "brad", "BrAd"},
|
||||
testCase{capitalLetters, strings.Repeat("a", (32<<10)+123), strings.Repeat("A", (32<<10)+123)},
|
||||
testCase{capitalLetters, "", ""},
|
||||
|
||||
testCase{inc, "brad", "csbe"},
|
||||
testCase{inc, "\x00\xff", "\x01\x00"},
|
||||
testCase{inc, "", ""},
|
||||
|
||||
testCase{NewStringReplacer("a", "1", "a", "2"), "brad", "br1d"},
|
||||
)
|
||||
|
||||
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
|
||||
s = nil
|
||||
for i := 0; i < 256; i++ {
|
||||
n := i + 1 - 'a'
|
||||
if n < 1 {
|
||||
n = 1
|
||||
}
|
||||
s = append(s, str(byte(i)), strings.Repeat(str(byte(i)), n))
|
||||
}
|
||||
repeat := NewStringReplacer(s...)
|
||||
|
||||
// Test cases with 1-byte old strings, variable length new strings.
|
||||
testCases = append(testCases,
|
||||
testCase{htmlEscaper, "No changes", "No changes"},
|
||||
testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"},
|
||||
testCase{htmlEscaper, "&&&", "&&&"},
|
||||
testCase{htmlEscaper, "", ""},
|
||||
|
||||
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
|
||||
testCase{repeat, "abba", "abbbba"},
|
||||
testCase{repeat, "", ""},
|
||||
|
||||
testCase{NewStringReplacer("a", "11", "a", "22"), "brad", "br11d"},
|
||||
)
|
||||
|
||||
// The remaining test cases have variable length old strings.
|
||||
|
||||
testCases = append(testCases,
|
||||
testCase{htmlUnescaper, "&amp;", "&"},
|
||||
testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"},
|
||||
testCase{htmlUnescaper, "", ""},
|
||||
|
||||
testCase{NewStringReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
|
||||
|
||||
testCase{NewStringReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
|
||||
|
||||
testCase{NewStringReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
|
||||
)
|
||||
|
||||
// gen1 has multiple old strings of variable length. There is no
|
||||
// overall non-empty common prefix, but some pairwise common prefixes.
|
||||
gen1 := NewStringReplacer(
|
||||
"aaa", "3[aaa]",
|
||||
"aa", "2[aa]",
|
||||
"a", "1[a]",
|
||||
"i", "i",
|
||||
"longerst", "most long",
|
||||
"longer", "medium",
|
||||
"long", "short",
|
||||
"xx", "xx",
|
||||
"x", "X",
|
||||
"X", "Y",
|
||||
"Y", "Z",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
|
||||
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
|
||||
testCase{gen1, "xxxxx", "xxxxX"},
|
||||
testCase{gen1, "XiX", "YiY"},
|
||||
testCase{gen1, "", ""},
|
||||
)
|
||||
|
||||
// gen2 has multiple old strings with no pairwise common prefix.
|
||||
gen2 := NewStringReplacer(
|
||||
"roses", "red",
|
||||
"violets", "blue",
|
||||
"sugar", "sweet",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
|
||||
testCase{gen2, "", ""},
|
||||
)
|
||||
|
||||
// gen3 has multiple old strings with an overall common prefix.
|
||||
gen3 := NewStringReplacer(
|
||||
"abracadabra", "poof",
|
||||
"abracadabrakazam", "splat",
|
||||
"abraham", "lincoln",
|
||||
"abrasion", "scrape",
|
||||
"abraham", "isaac",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
|
||||
testCase{gen3, "abrasion abracad", "scrape abracad"},
|
||||
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
|
||||
testCase{gen3, "", ""},
|
||||
)
|
||||
|
||||
// foo{1,2,3,4} have multiple old strings with an overall common prefix
|
||||
// and 1- or 2- byte extensions from the common prefix.
|
||||
foo1 := NewStringReplacer(
|
||||
"foo1", "A",
|
||||
"foo2", "B",
|
||||
"foo3", "C",
|
||||
)
|
||||
foo2 := NewStringReplacer(
|
||||
"foo1", "A",
|
||||
"foo2", "B",
|
||||
"foo31", "C",
|
||||
"foo32", "D",
|
||||
)
|
||||
foo3 := NewStringReplacer(
|
||||
"foo11", "A",
|
||||
"foo12", "B",
|
||||
"foo31", "C",
|
||||
"foo32", "D",
|
||||
)
|
||||
foo4 := NewStringReplacer(
|
||||
"foo12", "B",
|
||||
"foo32", "D",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
|
||||
testCase{foo1, "", ""},
|
||||
|
||||
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
|
||||
testCase{foo2, "", ""},
|
||||
|
||||
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
|
||||
testCase{foo3, "", ""},
|
||||
|
||||
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
|
||||
testCase{foo4, "", ""},
|
||||
)
|
||||
|
||||
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
|
||||
allBytes := make([]byte, 256)
|
||||
for i := range allBytes {
|
||||
allBytes[i] = byte(i)
|
||||
}
|
||||
allString := string(allBytes)
|
||||
genAll := NewStringReplacer(
|
||||
allString, "[all]",
|
||||
"\xff", "[ff]",
|
||||
"\x00", "[00]",
|
||||
)
|
||||
testCases = append(testCases,
|
||||
testCase{genAll, allString, "[all]"},
|
||||
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
|
||||
testCase{genAll, "", ""},
|
||||
)
|
||||
|
||||
// Test cases with empty old strings.
|
||||
|
||||
blankToX1 := NewStringReplacer("", "X")
|
||||
blankToX2 := NewStringReplacer("", "X", "", "")
|
||||
blankHighPriority := NewStringReplacer("", "X", "o", "O")
|
||||
blankLowPriority := NewStringReplacer("o", "O", "", "X")
|
||||
blankNoOp1 := NewStringReplacer("", "")
|
||||
blankNoOp2 := NewStringReplacer("", "", "", "A")
|
||||
blankFoo := NewStringReplacer("", "X", "foobar", "R", "foobaz", "Z")
|
||||
testCases = append(testCases,
|
||||
testCase{blankToX1, "foo", "XfXoXoX"},
|
||||
testCase{blankToX1, "", "X"},
|
||||
|
||||
testCase{blankToX2, "foo", "XfXoXoX"},
|
||||
testCase{blankToX2, "", "X"},
|
||||
|
||||
testCase{blankHighPriority, "oo", "XOXOX"},
|
||||
testCase{blankHighPriority, "ii", "XiXiX"},
|
||||
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
|
||||
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
|
||||
testCase{blankHighPriority, "", "X"},
|
||||
|
||||
testCase{blankLowPriority, "oo", "OOX"},
|
||||
testCase{blankLowPriority, "ii", "XiXiX"},
|
||||
testCase{blankLowPriority, "oiio", "OXiXiOX"},
|
||||
testCase{blankLowPriority, "iooi", "XiOOXiX"},
|
||||
testCase{blankLowPriority, "", "X"},
|
||||
|
||||
testCase{blankNoOp1, "foo", "foo"},
|
||||
testCase{blankNoOp1, "", ""},
|
||||
|
||||
testCase{blankNoOp2, "foo", "foo"},
|
||||
testCase{blankNoOp2, "", ""},
|
||||
|
||||
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
|
||||
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
|
||||
testCase{blankFoo, "", "X"},
|
||||
)
|
||||
|
||||
// single string replacer
|
||||
|
||||
abcMatcher := NewStringReplacer("abc", "[match]")
|
||||
|
||||
testCases = append(testCases,
|
||||
testCase{abcMatcher, "", ""},
|
||||
testCase{abcMatcher, "ab", "ab"},
|
||||
testCase{abcMatcher, "abc", "[match]"},
|
||||
testCase{abcMatcher, "abcd", "[match]d"},
|
||||
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
|
||||
)
|
||||
|
||||
// Issue 6659 cases (more single string replacer)
|
||||
|
||||
noHello := NewStringReplacer("Hello", "")
|
||||
testCases = append(testCases,
|
||||
testCase{noHello, "Hello", ""},
|
||||
testCase{noHello, "Hellox", "x"},
|
||||
testCase{noHello, "xHello", "x"},
|
||||
testCase{noHello, "xHellox", "xx"},
|
||||
)
|
||||
|
||||
// No-arg test cases.
|
||||
|
||||
nop := NewStringReplacer()
|
||||
testCases = append(testCases,
|
||||
testCase{nop, "abc", "abc"},
|
||||
testCase{nop, "", ""},
|
||||
)
|
||||
|
||||
// Run the test cases.
|
||||
|
||||
for i, tc := range testCases {
|
||||
if s := tc.r.Replace(tc.in); s != tc.out {
|
||||
t.Errorf("%d. strings.Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
n, err := tc.r.WriteString(&buf, tc.in)
|
||||
if err != nil {
|
||||
t.Errorf("%d. WriteString: %v", i, err)
|
||||
continue
|
||||
}
|
||||
got := buf.String()
|
||||
if got != tc.out {
|
||||
t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
|
||||
continue
|
||||
}
|
||||
if n != len(tc.out) {
|
||||
t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
|
||||
i, tc.in, n, len(tc.out), tc.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type errWriter struct{}
|
||||
|
||||
func (errWriter) Write(p []byte) (n int, err error) {
|
||||
return 0, fmt.Errorf("unwritable")
|
||||
}
|
||||
|
||||
func BenchmarkGenericNoMatch(b *testing.B) {
|
||||
str := strings.Repeat("A", 100) + strings.Repeat("B", 100)
|
||||
generic := NewStringReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
|
||||
for i := 0; i < b.N; i++ {
|
||||
generic.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGenericMatch1(b *testing.B) {
|
||||
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
|
||||
generic := NewStringReplacer("a", "A", "b", "B", "12", "123")
|
||||
for i := 0; i < b.N; i++ {
|
||||
generic.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGenericMatch2(b *testing.B) {
|
||||
str := strings.Repeat("It's <b>HTML</b>!", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlUnescaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSingleString(b *testing.B, pattern, text string) {
|
||||
r := NewStringReplacer(pattern, "[match]")
|
||||
b.SetBytes(int64(len(text)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
r.Replace(text)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSingleMaxSkipping(b *testing.B) {
|
||||
benchmarkSingleString(b, strings.Repeat("b", 25), strings.Repeat("a", 10000))
|
||||
}
|
||||
|
||||
func BenchmarkSingleLongSuffixFail(b *testing.B) {
|
||||
benchmarkSingleString(b, "b"+strings.Repeat("a", 500), strings.Repeat("a", 1002))
|
||||
}
|
||||
|
||||
func BenchmarkSingleMatch(b *testing.B) {
|
||||
benchmarkSingleString(b, "abcdef", strings.Repeat("abcdefghijklmno", 1000))
|
||||
}
|
||||
|
||||
func BenchmarkByteByteNoMatch(b *testing.B) {
|
||||
str := strings.Repeat("A", 100) + strings.Repeat("B", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteByteMatch(b *testing.B) {
|
||||
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteStringMatch(b *testing.B) {
|
||||
str := "<" + strings.Repeat("a", 99) + strings.Repeat("b", 99) + ">"
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHTMLEscapeNew(b *testing.B) {
|
||||
str := "I <3 to escape HTML & other text too."
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHTMLEscapeOld(b *testing.B) {
|
||||
str := "I <3 to escape HTML & other text too."
|
||||
for i := 0; i < b.N; i++ {
|
||||
oldHTMLEscape(str)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteStringReplacerWriteString(b *testing.B) {
|
||||
str := strings.Repeat("I <3 to escape HTML & other text too.", 100)
|
||||
buf := new(bytes.Buffer)
|
||||
for i := 0; i < b.N; i++ {
|
||||
htmlEscaper.WriteString(buf, str)
|
||||
buf.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkByteReplacerWriteString(b *testing.B) {
|
||||
str := strings.Repeat("abcdefghijklmnopqrstuvwxyz", 100)
|
||||
buf := new(bytes.Buffer)
|
||||
for i := 0; i < b.N; i++ {
|
||||
capitalLetters.WriteString(buf, str)
|
||||
buf.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
|
||||
func BenchmarkByteByteReplaces(b *testing.B) {
|
||||
str := strings.Repeat("a", 100) + strings.Repeat("b", 100)
|
||||
for i := 0; i < b.N; i++ {
|
||||
strings.Replace(strings.Replace(str, "a", "A", -1), "b", "B", -1)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
package misspell
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// Regexp for URL https://mathiasbynens.be/demo/url-regex
|
||||
//
|
||||
// original @imme_emosol (54 chars) has trouble with dashes in hostname
|
||||
// @(https?|ftp)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$@iS
|
||||
var reURL = regexp.MustCompile(`(?i)(https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+(/[^\s]*)?`)
|
||||
|
||||
// StripURL attemps to replace URLs with blank spaces, e.g.
|
||||
// "xxx http://foo.com/ yyy -> "xxx yyyy"
|
||||
func StripURL(s string) string {
|
||||
return reURL.ReplaceAllStringFunc(s, replaceWithBlanks)
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue