Allow downstream projects to use faster regexp (#10251)

* Add benchmark for FastRegexMatcher

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Use modified regexp package with optimisations

See https://github.com/grafana/regexp/tree/speedup#readme

Includes the following changes proposed upstream:
* [regexp: allow patterns with no alternates to be one-pass](https://go-review.googlesource.com/c/go/+/353711)
* [regexp: speed up onepass prefix check](https://go-review.googlesource.com/c/go/+/354909)
* [regexp: handle prefix string with fold-case](https://go-review.googlesource.com/c/go/+/358756)
* [regexp: avoid copying each instruction executed](https://go-review.googlesource.com/c/go/+/355789)
* [regexp: allow prefix string anchored at beginning](https://go-review.googlesource.com/c/go/+/377294)

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Use regexp code identical to upstream Go

Change `grafana/regexp` import to use `main` branch.

This means Prometheus is not using the proposed optimisations, but
downstream users of Prometheus code are able to `replace` the library
with the `speedup` branch which does.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
pull/10242/head
Bryan Boreham 2022-02-08 10:03:20 +00:00 committed by GitHub
parent 8d8ce641a4
commit 579331446a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 3 deletions

1
go.mod
View File

@ -31,6 +31,7 @@ require (
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd
github.com/gophercloud/gophercloud v0.24.0
github.com/gorilla/mux v1.8.0 // indirect
github.com/grafana/regexp v0.0.0-20220202152315-e74e38789280
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/consul/api v1.12.0
github.com/hashicorp/go-hclog v0.12.2 // indirect

2
go.sum
View File

@ -672,6 +672,8 @@ github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB7
github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grafana/regexp v0.0.0-20220202152315-e74e38789280 h1:MOND6wXrwVXEzmL2bZ+Jcbgycwt1LD5q6NQbqz/Nlic=
github.com/grafana/regexp v0.0.0-20220202152315-e74e38789280/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=

View File

@ -14,9 +14,10 @@
package labels
import (
"regexp"
"regexp/syntax"
"strings"
"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
)
type FastRegexMatcher struct {

View File

@ -14,9 +14,10 @@
package labels
import (
"regexp/syntax"
"strings"
"testing"
"github.com/grafana/regexp/syntax"
"github.com/stretchr/testify/require"
)
@ -96,3 +97,42 @@ func TestOptimizeConcatRegex(t *testing.T) {
require.Equal(t, c.contains, contains)
}
}
func BenchmarkFastRegexMatcher(b *testing.B) {
var (
x = strings.Repeat("x", 50)
y = "foo" + x
z = x + "foo"
)
regexes := []string{
"foo",
"^foo",
"(foo|bar)",
"foo.*",
".*foo",
"^.*foo$",
"^.+foo$",
".*",
".+",
"foo.+",
".+foo",
".*foo.*",
"(?i:foo)",
"(prometheus|api_prom)_api_v1_.+",
"((fo(bar))|.+foo)",
}
for _, r := range regexes {
r := r
b.Run(r, func(b *testing.B) {
m, err := NewFastRegexMatcher(r)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = m.MatchString(x)
_ = m.MatchString(y)
_ = m.MatchString(z)
}
})
}
}