mirror of https://github.com/prometheus/prometheus
[ENHANCEMENT] Optimize regexps with multiple prefixes (#13843)
For example `foo.*|bar.*|baz.*`. Instead of checking each one in turn, we build a map of prefixes, then check the smaller set that could match the string supplied. Signed-off-by: Bryan Boreham <bjboreham@gmail.com> * Improve testing and readability Address review comments on #13843 Signed-off-by: Marco Pracucci <marco@pracucci.com>pull/14425/head
parent
ba948e94fb
commit
82a8c6abe2
|
@ -28,7 +28,7 @@ const (
|
||||||
maxSetMatches = 256
|
maxSetMatches = 256
|
||||||
|
|
||||||
// The minimum number of alternate values a regex should have to trigger
|
// The minimum number of alternate values a regex should have to trigger
|
||||||
// the optimization done by optimizeEqualStringMatchers() and so use a map
|
// the optimization done by optimizeEqualOrPrefixStringMatchers() and so use a map
|
||||||
// to match values instead of iterating over a list. This value has
|
// to match values instead of iterating over a list. This value has
|
||||||
// been computed running BenchmarkOptimizeEqualStringMatchers.
|
// been computed running BenchmarkOptimizeEqualStringMatchers.
|
||||||
minEqualMultiStringMatcherMapThreshold = 16
|
minEqualMultiStringMatcherMapThreshold = 16
|
||||||
|
@ -337,7 +337,7 @@ func optimizeAlternatingLiterals(s string) (StringMatcher, []string) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates)
|
multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates, 0, 0)
|
||||||
|
|
||||||
for end := strings.IndexByte(s, '|'); end > -1; end = strings.IndexByte(s, '|') {
|
for end := strings.IndexByte(s, '|'); end > -1; end = strings.IndexByte(s, '|') {
|
||||||
// Split the string into the next literal and the remainder
|
// Split the string into the next literal and the remainder
|
||||||
|
@ -412,7 +412,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
|
||||||
clearBeginEndText(re)
|
clearBeginEndText(re)
|
||||||
|
|
||||||
m := stringMatcherFromRegexpInternal(re)
|
m := stringMatcherFromRegexpInternal(re)
|
||||||
m = optimizeEqualStringMatchers(m, minEqualMultiStringMatcherMapThreshold)
|
m = optimizeEqualOrPrefixStringMatchers(m, minEqualMultiStringMatcherMapThreshold)
|
||||||
|
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
@ -732,17 +732,20 @@ func (m *equalStringMatcher) Matches(s string) bool {
|
||||||
type multiStringMatcherBuilder interface {
|
type multiStringMatcherBuilder interface {
|
||||||
StringMatcher
|
StringMatcher
|
||||||
add(s string)
|
add(s string)
|
||||||
|
addPrefix(prefix string, prefixCaseSensitive bool, matcher StringMatcher)
|
||||||
setMatches() []string
|
setMatches() []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize int) multiStringMatcherBuilder {
|
func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize, estimatedPrefixes, minPrefixLength int) multiStringMatcherBuilder {
|
||||||
// If the estimated size is low enough, it's faster to use a slice instead of a map.
|
// If the estimated size is low enough, it's faster to use a slice instead of a map.
|
||||||
if estimatedSize < minEqualMultiStringMatcherMapThreshold {
|
if estimatedSize < minEqualMultiStringMatcherMapThreshold && estimatedPrefixes == 0 {
|
||||||
return &equalMultiStringSliceMatcher{caseSensitive: caseSensitive, values: make([]string, 0, estimatedSize)}
|
return &equalMultiStringSliceMatcher{caseSensitive: caseSensitive, values: make([]string, 0, estimatedSize)}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &equalMultiStringMapMatcher{
|
return &equalMultiStringMapMatcher{
|
||||||
values: make(map[string]struct{}, estimatedSize),
|
values: make(map[string]struct{}, estimatedSize),
|
||||||
|
prefixes: make(map[string][]StringMatcher, estimatedPrefixes),
|
||||||
|
minPrefixLen: minPrefixLength,
|
||||||
caseSensitive: caseSensitive,
|
caseSensitive: caseSensitive,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -758,6 +761,10 @@ func (m *equalMultiStringSliceMatcher) add(s string) {
|
||||||
m.values = append(m.values, s)
|
m.values = append(m.values, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringSliceMatcher) addPrefix(_ string, _ bool, _ StringMatcher) {
|
||||||
|
panic("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
func (m *equalMultiStringSliceMatcher) setMatches() []string {
|
func (m *equalMultiStringSliceMatcher) setMatches() []string {
|
||||||
return m.values
|
return m.values
|
||||||
}
|
}
|
||||||
|
@ -779,12 +786,17 @@ func (m *equalMultiStringSliceMatcher) Matches(s string) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// equalMultiStringMapMatcher matches a string exactly against a map of valid values.
|
// equalMultiStringMapMatcher matches a string exactly against a map of valid values
|
||||||
|
// or against a set of prefix matchers.
|
||||||
type equalMultiStringMapMatcher struct {
|
type equalMultiStringMapMatcher struct {
|
||||||
// values contains values to match a string against. If the matching is case insensitive,
|
// values contains values to match a string against. If the matching is case insensitive,
|
||||||
// the values here must be lowercase.
|
// the values here must be lowercase.
|
||||||
values map[string]struct{}
|
values map[string]struct{}
|
||||||
|
// prefixes maps strings, all of length minPrefixLen, to sets of matchers to check the rest of the string.
|
||||||
|
// If the matching is case insensitive, prefixes are all lowercase.
|
||||||
|
prefixes map[string][]StringMatcher
|
||||||
|
// minPrefixLen can be zero, meaning there are no prefix matchers.
|
||||||
|
minPrefixLen int
|
||||||
caseSensitive bool
|
caseSensitive bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -796,8 +808,27 @@ func (m *equalMultiStringMapMatcher) add(s string) {
|
||||||
m.values[s] = struct{}{}
|
m.values[s] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *equalMultiStringMapMatcher) addPrefix(prefix string, prefixCaseSensitive bool, matcher StringMatcher) {
|
||||||
|
if m.minPrefixLen == 0 {
|
||||||
|
panic("addPrefix called when no prefix length defined")
|
||||||
|
}
|
||||||
|
if len(prefix) < m.minPrefixLen {
|
||||||
|
panic("addPrefix called with a too short prefix")
|
||||||
|
}
|
||||||
|
if m.caseSensitive != prefixCaseSensitive {
|
||||||
|
panic("addPrefix called with a prefix whose case sensitivity is different than the expected one")
|
||||||
|
}
|
||||||
|
|
||||||
|
s := prefix[:m.minPrefixLen]
|
||||||
|
if !m.caseSensitive {
|
||||||
|
s = strings.ToLower(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
m.prefixes[s] = append(m.prefixes[s], matcher)
|
||||||
|
}
|
||||||
|
|
||||||
func (m *equalMultiStringMapMatcher) setMatches() []string {
|
func (m *equalMultiStringMapMatcher) setMatches() []string {
|
||||||
if len(m.values) >= maxSetMatches {
|
if len(m.values) >= maxSetMatches || len(m.prefixes) > 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -813,8 +844,17 @@ func (m *equalMultiStringMapMatcher) Matches(s string) bool {
|
||||||
s = toNormalisedLower(s)
|
s = toNormalisedLower(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, ok := m.values[s]
|
if _, ok := m.values[s]; ok {
|
||||||
return ok
|
return true
|
||||||
|
}
|
||||||
|
if m.minPrefixLen > 0 && len(s) >= m.minPrefixLen {
|
||||||
|
for _, matcher := range m.prefixes[s[:m.minPrefixLen]] {
|
||||||
|
if matcher.Matches(s) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
|
// toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
|
||||||
|
@ -897,20 +937,24 @@ func (m trueMatcher) Matches(_ string) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// optimizeEqualStringMatchers optimize a specific case where all matchers are made by an
|
// optimizeEqualOrPrefixStringMatchers optimize a specific case where all matchers are made by an
|
||||||
// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher). In
|
// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher) or
|
||||||
// this specific case, when we have many strings to match against we can use a map instead
|
// with a literal prefix (literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher).
|
||||||
|
//
|
||||||
|
// In this specific case, when we have many strings to match against we can use a map instead
|
||||||
// of iterating over the list of strings.
|
// of iterating over the list of strings.
|
||||||
func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatcher {
|
func optimizeEqualOrPrefixStringMatchers(input StringMatcher, threshold int) StringMatcher {
|
||||||
var (
|
var (
|
||||||
caseSensitive bool
|
caseSensitive bool
|
||||||
caseSensitiveSet bool
|
caseSensitiveSet bool
|
||||||
numValues int
|
numValues int
|
||||||
|
numPrefixes int
|
||||||
|
minPrefixLength int
|
||||||
)
|
)
|
||||||
|
|
||||||
// Analyse the input StringMatcher to count the number of occurrences
|
// Analyse the input StringMatcher to count the number of occurrences
|
||||||
// and ensure all of them have the same case sensitivity.
|
// and ensure all of them have the same case sensitivity.
|
||||||
analyseCallback := func(matcher *equalStringMatcher) bool {
|
analyseEqualMatcherCallback := func(matcher *equalStringMatcher) bool {
|
||||||
// Ensure we don't have mixed case sensitivity.
|
// Ensure we don't have mixed case sensitivity.
|
||||||
if caseSensitiveSet && caseSensitive != matcher.caseSensitive {
|
if caseSensitiveSet && caseSensitive != matcher.caseSensitive {
|
||||||
return false
|
return false
|
||||||
|
@ -923,34 +967,55 @@ func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatch
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
if !findEqualStringMatchers(input, analyseCallback) {
|
analysePrefixMatcherCallback := func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool {
|
||||||
|
// Ensure we don't have mixed case sensitivity.
|
||||||
|
if caseSensitiveSet && caseSensitive != prefixCaseSensitive {
|
||||||
|
return false
|
||||||
|
} else if !caseSensitiveSet {
|
||||||
|
caseSensitive = prefixCaseSensitive
|
||||||
|
caseSensitiveSet = true
|
||||||
|
}
|
||||||
|
if numPrefixes == 0 || len(prefix) < minPrefixLength {
|
||||||
|
minPrefixLength = len(prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
numPrefixes++
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if !findEqualOrPrefixStringMatchers(input, analyseEqualMatcherCallback, analysePrefixMatcherCallback) {
|
||||||
return input
|
return input
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the number of values found is less than the threshold, then we should skip the optimization.
|
// If the number of values and prefixes found is less than the threshold, then we should skip the optimization.
|
||||||
if numValues < threshold {
|
if (numValues + numPrefixes) < threshold {
|
||||||
return input
|
return input
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse again the input StringMatcher to extract all values and storing them.
|
// Parse again the input StringMatcher to extract all values and storing them.
|
||||||
// We can skip the case sensitivity check because we've already checked it and
|
// We can skip the case sensitivity check because we've already checked it and
|
||||||
// if the code reach this point then it means all matchers have the same case sensitivity.
|
// if the code reach this point then it means all matchers have the same case sensitivity.
|
||||||
multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues)
|
multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues, numPrefixes, minPrefixLength)
|
||||||
|
|
||||||
// Ignore the return value because we already iterated over the input StringMatcher
|
// Ignore the return value because we already iterated over the input StringMatcher
|
||||||
// and it was all good.
|
// and it was all good.
|
||||||
findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
findEqualOrPrefixStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
||||||
multiMatcher.add(matcher.s)
|
multiMatcher.add(matcher.s)
|
||||||
return true
|
return true
|
||||||
|
}, func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool {
|
||||||
|
multiMatcher.addPrefix(prefix, caseSensitive, matcher)
|
||||||
|
return true
|
||||||
})
|
})
|
||||||
|
|
||||||
return multiMatcher
|
return multiMatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
// findEqualStringMatchers analyze the input StringMatcher and calls the callback for each
|
// findEqualOrPrefixStringMatchers analyze the input StringMatcher and calls the equalMatcherCallback for each
|
||||||
// equalStringMatcher found. Returns true if and only if the input StringMatcher is *only*
|
// equalStringMatcher found, and prefixMatcherCallback for each literalPrefixSensitiveStringMatcher and literalPrefixInsensitiveStringMatcher found.
|
||||||
// composed by an alternation of equalStringMatcher.
|
//
|
||||||
func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalStringMatcher) bool) bool {
|
// Returns true if and only if the input StringMatcher is *only* composed by an alternation of equalStringMatcher and/or
|
||||||
|
// literal prefix matcher. Returns false if prefixMatcherCallback is nil and a literal prefix matcher is encountered.
|
||||||
|
func findEqualOrPrefixStringMatchers(input StringMatcher, equalMatcherCallback func(matcher *equalStringMatcher) bool, prefixMatcherCallback func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool) bool {
|
||||||
orInput, ok := input.(orStringMatcher)
|
orInput, ok := input.(orStringMatcher)
|
||||||
if !ok {
|
if !ok {
|
||||||
return false
|
return false
|
||||||
|
@ -959,17 +1024,27 @@ func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalSt
|
||||||
for _, m := range orInput {
|
for _, m := range orInput {
|
||||||
switch casted := m.(type) {
|
switch casted := m.(type) {
|
||||||
case orStringMatcher:
|
case orStringMatcher:
|
||||||
if !findEqualStringMatchers(m, callback) {
|
if !findEqualOrPrefixStringMatchers(m, equalMatcherCallback, prefixMatcherCallback) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
case *equalStringMatcher:
|
case *equalStringMatcher:
|
||||||
if !callback(casted) {
|
if !equalMatcherCallback(casted) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
case *literalPrefixSensitiveStringMatcher:
|
||||||
|
if prefixMatcherCallback == nil || !prefixMatcherCallback(casted.prefix, true, casted) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
case *literalPrefixInsensitiveStringMatcher:
|
||||||
|
if prefixMatcherCallback == nil || !prefixMatcherCallback(casted.prefix, false, casted) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// It's not an equal string matcher, so we have to stop searching
|
// It's not an equal or prefix string matcher, so we have to stop searching
|
||||||
// cause this optimization can't be applied.
|
// cause this optimization can't be applied.
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,6 +71,8 @@ var (
|
||||||
// A long case insensitive alternation.
|
// A long case insensitive alternation.
|
||||||
"(?i:(zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb))",
|
"(?i:(zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb))",
|
||||||
"(?i:(AAAAAAAAAAAAAAAAAAAAAAAA|BBBBBBBBBBBBBBBBBBBBBBBB|cccccccccccccccccccccccC|ſſſſſſſſſſſſſſſſſſſſſſſſS|SSSSSSSSSSSSSSSSSSSSSSSSſ))",
|
"(?i:(AAAAAAAAAAAAAAAAAAAAAAAA|BBBBBBBBBBBBBBBBBBBBBBBB|cccccccccccccccccccccccC|ſſſſſſſſſſſſſſſſſſſſſſſſS|SSSSSSSSSSSSSSSSSSSSSSSSſ))",
|
||||||
|
// A short case insensitive alternation where each entry ends with ".*".
|
||||||
|
"(?i:(zQPbMkNO.*|NNSPdvMi.*|iWuuSoAl.*))",
|
||||||
// A long case insensitive alternation where each entry ends with ".*".
|
// A long case insensitive alternation where each entry ends with ".*".
|
||||||
"(?i:(zQPbMkNO.*|NNSPdvMi.*|iWuuSoAl.*|qbvKMimS.*|IecrXtPa.*|seTckYqt.*|NxnyHkgB.*|fIDlOgKb.*|UhlWIygH.*|OtNoJxHG.*|cUTkFVIV.*|mTgFIHjr.*|jQkoIDtE.*|PPMKxRXl.*|AwMfwVkQ.*|CQyMrTQJ.*|BzrqxVSi.*|nTpcWuhF.*|PertdywG.*|ZZDgCtXN.*|WWdDPyyE.*|uVtNQsKk.*|BdeCHvPZ.*|wshRnFlH.*|aOUIitIp.*|RxZeCdXT.*|CFZMslCj.*|AVBZRDxl.*|IzIGCnhw.*|ythYuWiz.*|oztXVXhl.*|VbLkwqQx.*|qvaUgyVC.*|VawUjPWC.*|ecloYJuj.*|boCLTdSU.*|uPrKeAZx.*|hrMWLWBq.*|JOnUNHRM.*|rYnujkPq.*|dDEdZhIj.*|DRrfvugG.*|yEGfDxVV.*|YMYdJWuP.*|PHUQZNWM.*|AmKNrLis.*|zTxndVfn.*|FPsHoJnc.*|EIulZTua.*|KlAPhdzg.*|ScHJJCLt.*|NtTfMzME.*|eMCwuFdo.*|SEpJVJbR.*|cdhXZeCx.*|sAVtBwRh.*|kVFEVcMI.*|jzJrxraA.*|tGLHTell.*|NNWoeSaw.*|DcOKSetX.*|UXZAJyka.*|THpMphDP.*|rizheevl.*|kDCBRidd.*|pCZZRqyu.*|pSygkitl.*|SwZGkAaW.*|wILOrfNX.*|QkwVOerj.*|kHOMxPDr.*|EwOVycJv.*|AJvtzQFS.*|yEOjKYYB.*|LizIINLL.*|JBRSsfcG.*|YPiUqqNl.*|IsdEbvee.*|MjEpGcBm.*|OxXZVgEQ.*|xClXGuxa.*|UzRCGFEb.*|buJbvfvA.*|IPZQxRet.*|oFYShsMc.*|oBHffuHO.*|bzzKrcBR.*|KAjzrGCl.*|IPUsAVls.*|OGMUMbIU.*|gyDccHuR.*|bjlalnDd.*|ZLWjeMna.*|fdsuIlxQ.*|dVXtiomV.*|XxedTjNg.*|XWMHlNoA.*|nnyqArQX.*|opfkWGhb.*|wYtnhdYb.*))",
|
"(?i:(zQPbMkNO.*|NNSPdvMi.*|iWuuSoAl.*|qbvKMimS.*|IecrXtPa.*|seTckYqt.*|NxnyHkgB.*|fIDlOgKb.*|UhlWIygH.*|OtNoJxHG.*|cUTkFVIV.*|mTgFIHjr.*|jQkoIDtE.*|PPMKxRXl.*|AwMfwVkQ.*|CQyMrTQJ.*|BzrqxVSi.*|nTpcWuhF.*|PertdywG.*|ZZDgCtXN.*|WWdDPyyE.*|uVtNQsKk.*|BdeCHvPZ.*|wshRnFlH.*|aOUIitIp.*|RxZeCdXT.*|CFZMslCj.*|AVBZRDxl.*|IzIGCnhw.*|ythYuWiz.*|oztXVXhl.*|VbLkwqQx.*|qvaUgyVC.*|VawUjPWC.*|ecloYJuj.*|boCLTdSU.*|uPrKeAZx.*|hrMWLWBq.*|JOnUNHRM.*|rYnujkPq.*|dDEdZhIj.*|DRrfvugG.*|yEGfDxVV.*|YMYdJWuP.*|PHUQZNWM.*|AmKNrLis.*|zTxndVfn.*|FPsHoJnc.*|EIulZTua.*|KlAPhdzg.*|ScHJJCLt.*|NtTfMzME.*|eMCwuFdo.*|SEpJVJbR.*|cdhXZeCx.*|sAVtBwRh.*|kVFEVcMI.*|jzJrxraA.*|tGLHTell.*|NNWoeSaw.*|DcOKSetX.*|UXZAJyka.*|THpMphDP.*|rizheevl.*|kDCBRidd.*|pCZZRqyu.*|pSygkitl.*|SwZGkAaW.*|wILOrfNX.*|QkwVOerj.*|kHOMxPDr.*|EwOVycJv.*|AJvtzQFS.*|yEOjKYYB.*|LizIINLL.*|JBRSsfcG.*|YPiUqqNl.*|IsdEbvee.*|MjEpGcBm.*|OxXZVgEQ.*|xClXGuxa.*|UzRCGFEb.*|buJbvfvA.*|IPZQxRet.*|oFYShsMc.*|oBHffuHO.*|bzzKrcBR.*|KAjzrGCl.*|IPUsAVls.*|OGMUMbIU.*|gyDccHuR.*|bjlalnDd.*|ZLWjeMna.*|fdsuIlxQ.*|dVXtiomV.*|XxedTjNg.*|XWMHlNoA.*|nnyqArQX.*|opfkWGhb.*|wYtnhdYb.*))",
|
||||||
// A long case insensitive alternation where each entry starts with ".*".
|
// A long case insensitive alternation where each entry starts with ".*".
|
||||||
|
@ -686,7 +688,15 @@ func randStrings(randGenerator *rand.Rand, many, length int) []string {
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestOptimizeEqualStringMatchers(t *testing.T) {
|
func randStringsWithSuffix(randGenerator *rand.Rand, many, length int, suffix string) []string {
|
||||||
|
out := randStrings(randGenerator, many, length)
|
||||||
|
for i := range out {
|
||||||
|
out[i] += suffix
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOptimizeEqualOrPrefixStringMatchers(t *testing.T) {
|
||||||
tests := map[string]struct {
|
tests := map[string]struct {
|
||||||
input StringMatcher
|
input StringMatcher
|
||||||
expectedValues []string
|
expectedValues []string
|
||||||
|
@ -767,7 +777,7 @@ func TestOptimizeEqualStringMatchers(t *testing.T) {
|
||||||
|
|
||||||
for testName, testData := range tests {
|
for testName, testData := range tests {
|
||||||
t.Run(testName, func(t *testing.T) {
|
t.Run(testName, func(t *testing.T) {
|
||||||
actualMatcher := optimizeEqualStringMatchers(testData.input, 0)
|
actualMatcher := optimizeEqualOrPrefixStringMatchers(testData.input, 0)
|
||||||
|
|
||||||
if testData.expectedValues == nil {
|
if testData.expectedValues == nil {
|
||||||
require.IsType(t, testData.input, actualMatcher)
|
require.IsType(t, testData.input, actualMatcher)
|
||||||
|
@ -783,8 +793,10 @@ func TestOptimizeEqualStringMatchers(t *testing.T) {
|
||||||
func TestNewEqualMultiStringMatcher(t *testing.T) {
|
func TestNewEqualMultiStringMatcher(t *testing.T) {
|
||||||
tests := map[string]struct {
|
tests := map[string]struct {
|
||||||
values []string
|
values []string
|
||||||
|
caseSensitivePrefixes []*literalPrefixSensitiveStringMatcher
|
||||||
caseSensitive bool
|
caseSensitive bool
|
||||||
expectedValuesMap map[string]struct{}
|
expectedValuesMap map[string]struct{}
|
||||||
|
expectedPrefixesMap map[string][]StringMatcher
|
||||||
expectedValuesList []string
|
expectedValuesList []string
|
||||||
}{
|
}{
|
||||||
"few case sensitive values": {
|
"few case sensitive values": {
|
||||||
|
@ -797,27 +809,47 @@ func TestNewEqualMultiStringMatcher(t *testing.T) {
|
||||||
caseSensitive: false,
|
caseSensitive: false,
|
||||||
expectedValuesList: []string{"a", "B"},
|
expectedValuesList: []string{"a", "B"},
|
||||||
},
|
},
|
||||||
|
"few case sensitive values and prefixes": {
|
||||||
|
values: []string{"a"},
|
||||||
|
caseSensitivePrefixes: []*literalPrefixSensitiveStringMatcher{{prefix: "B", right: anyStringWithoutNewlineMatcher{}}},
|
||||||
|
caseSensitive: true,
|
||||||
|
expectedValuesMap: map[string]struct{}{"a": {}},
|
||||||
|
expectedPrefixesMap: map[string][]StringMatcher{"B": {&literalPrefixSensitiveStringMatcher{prefix: "B", right: anyStringWithoutNewlineMatcher{}}}},
|
||||||
|
},
|
||||||
"many case sensitive values": {
|
"many case sensitive values": {
|
||||||
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
||||||
caseSensitive: true,
|
caseSensitive: true,
|
||||||
expectedValuesMap: map[string]struct{}{"a": {}, "B": {}, "c": {}, "D": {}, "e": {}, "F": {}, "g": {}, "H": {}, "i": {}, "L": {}, "m": {}, "N": {}, "o": {}, "P": {}, "q": {}, "r": {}},
|
expectedValuesMap: map[string]struct{}{"a": {}, "B": {}, "c": {}, "D": {}, "e": {}, "F": {}, "g": {}, "H": {}, "i": {}, "L": {}, "m": {}, "N": {}, "o": {}, "P": {}, "q": {}, "r": {}},
|
||||||
|
expectedPrefixesMap: map[string][]StringMatcher{},
|
||||||
},
|
},
|
||||||
"many case insensitive values": {
|
"many case insensitive values": {
|
||||||
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
||||||
caseSensitive: false,
|
caseSensitive: false,
|
||||||
expectedValuesMap: map[string]struct{}{"a": {}, "b": {}, "c": {}, "d": {}, "e": {}, "f": {}, "g": {}, "h": {}, "i": {}, "l": {}, "m": {}, "n": {}, "o": {}, "p": {}, "q": {}, "r": {}},
|
expectedValuesMap: map[string]struct{}{"a": {}, "b": {}, "c": {}, "d": {}, "e": {}, "f": {}, "g": {}, "h": {}, "i": {}, "l": {}, "m": {}, "n": {}, "o": {}, "p": {}, "q": {}, "r": {}},
|
||||||
|
expectedPrefixesMap: map[string][]StringMatcher{},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for testName, testData := range tests {
|
for testName, testData := range tests {
|
||||||
t.Run(testName, func(t *testing.T) {
|
t.Run(testName, func(t *testing.T) {
|
||||||
matcher := newEqualMultiStringMatcher(testData.caseSensitive, len(testData.values))
|
// To keep this test simple, we always assume a min prefix length of 1.
|
||||||
|
minPrefixLength := 0
|
||||||
|
if len(testData.caseSensitivePrefixes) > 0 {
|
||||||
|
minPrefixLength = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
matcher := newEqualMultiStringMatcher(testData.caseSensitive, len(testData.values), len(testData.caseSensitivePrefixes), minPrefixLength)
|
||||||
for _, v := range testData.values {
|
for _, v := range testData.values {
|
||||||
matcher.add(v)
|
matcher.add(v)
|
||||||
}
|
}
|
||||||
if testData.expectedValuesMap != nil {
|
for _, p := range testData.caseSensitivePrefixes {
|
||||||
|
matcher.addPrefix(p.prefix, true, p)
|
||||||
|
}
|
||||||
|
|
||||||
|
if testData.expectedValuesMap != nil || testData.expectedPrefixesMap != nil {
|
||||||
require.IsType(t, &equalMultiStringMapMatcher{}, matcher)
|
require.IsType(t, &equalMultiStringMapMatcher{}, matcher)
|
||||||
require.Equal(t, testData.expectedValuesMap, matcher.(*equalMultiStringMapMatcher).values)
|
require.Equal(t, testData.expectedValuesMap, matcher.(*equalMultiStringMapMatcher).values)
|
||||||
|
require.Equal(t, testData.expectedPrefixesMap, matcher.(*equalMultiStringMapMatcher).prefixes)
|
||||||
require.Equal(t, testData.caseSensitive, matcher.(*equalMultiStringMapMatcher).caseSensitive)
|
require.Equal(t, testData.caseSensitive, matcher.(*equalMultiStringMapMatcher).caseSensitive)
|
||||||
}
|
}
|
||||||
if testData.expectedValuesList != nil {
|
if testData.expectedValuesList != nil {
|
||||||
|
@ -829,9 +861,32 @@ func TestNewEqualMultiStringMatcher(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEqualMultiStringMapMatcher_addPrefix(t *testing.T) {
|
||||||
|
t.Run("should panic if the matcher is case sensitive but the prefix is not case sensitive", func(t *testing.T) {
|
||||||
|
matcher := newEqualMultiStringMatcher(true, 0, 1, 1)
|
||||||
|
|
||||||
|
require.Panics(t, func() {
|
||||||
|
matcher.addPrefix("a", false, &literalPrefixInsensitiveStringMatcher{
|
||||||
|
prefix: "a",
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("should panic if the matcher is not case sensitive but the prefix is case sensitive", func(t *testing.T) {
|
||||||
|
matcher := newEqualMultiStringMatcher(false, 0, 1, 1)
|
||||||
|
|
||||||
|
require.Panics(t, func() {
|
||||||
|
matcher.addPrefix("a", true, &literalPrefixSensitiveStringMatcher{
|
||||||
|
prefix: "a",
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestEqualMultiStringMatcher_Matches(t *testing.T) {
|
func TestEqualMultiStringMatcher_Matches(t *testing.T) {
|
||||||
tests := map[string]struct {
|
tests := map[string]struct {
|
||||||
values []string
|
values []string
|
||||||
|
prefixes []StringMatcher
|
||||||
caseSensitive bool
|
caseSensitive bool
|
||||||
expectedMatches []string
|
expectedMatches []string
|
||||||
expectedNotMatches []string
|
expectedNotMatches []string
|
||||||
|
@ -848,6 +903,24 @@ func TestEqualMultiStringMatcher_Matches(t *testing.T) {
|
||||||
expectedMatches: []string{"a", "A", "b", "B"},
|
expectedMatches: []string{"a", "A", "b", "B"},
|
||||||
expectedNotMatches: []string{"c", "C"},
|
expectedNotMatches: []string{"c", "C"},
|
||||||
},
|
},
|
||||||
|
"few case sensitive prefixes": {
|
||||||
|
prefixes: []StringMatcher{
|
||||||
|
&literalPrefixSensitiveStringMatcher{prefix: "a", right: anyStringWithoutNewlineMatcher{}},
|
||||||
|
&literalPrefixSensitiveStringMatcher{prefix: "B", right: anyStringWithoutNewlineMatcher{}},
|
||||||
|
},
|
||||||
|
caseSensitive: true,
|
||||||
|
expectedMatches: []string{"a", "aX", "B", "BX"},
|
||||||
|
expectedNotMatches: []string{"A", "b"},
|
||||||
|
},
|
||||||
|
"few case insensitive prefixes": {
|
||||||
|
prefixes: []StringMatcher{
|
||||||
|
&literalPrefixInsensitiveStringMatcher{prefix: "a", right: anyStringWithoutNewlineMatcher{}},
|
||||||
|
&literalPrefixInsensitiveStringMatcher{prefix: "B", right: anyStringWithoutNewlineMatcher{}},
|
||||||
|
},
|
||||||
|
caseSensitive: false,
|
||||||
|
expectedMatches: []string{"a", "aX", "A", "AX", "b", "bX", "B", "BX"},
|
||||||
|
expectedNotMatches: []string{"c", "cX", "C", "CX"},
|
||||||
|
},
|
||||||
"many case sensitive values": {
|
"many case sensitive values": {
|
||||||
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
values: []string{"a", "B", "c", "D", "e", "F", "g", "H", "i", "L", "m", "N", "o", "P", "q", "r"},
|
||||||
caseSensitive: true,
|
caseSensitive: true,
|
||||||
|
@ -860,14 +933,37 @@ func TestEqualMultiStringMatcher_Matches(t *testing.T) {
|
||||||
expectedMatches: []string{"a", "A", "b", "B"},
|
expectedMatches: []string{"a", "A", "b", "B"},
|
||||||
expectedNotMatches: []string{"x", "X"},
|
expectedNotMatches: []string{"x", "X"},
|
||||||
},
|
},
|
||||||
|
"mixed values and prefixes": {
|
||||||
|
values: []string{"a"},
|
||||||
|
prefixes: []StringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "B", right: anyStringWithoutNewlineMatcher{}}},
|
||||||
|
caseSensitive: true,
|
||||||
|
expectedMatches: []string{"a", "B", "BX"},
|
||||||
|
expectedNotMatches: []string{"aX", "A", "b", "bX"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for testName, testData := range tests {
|
for testName, testData := range tests {
|
||||||
t.Run(testName, func(t *testing.T) {
|
t.Run(testName, func(t *testing.T) {
|
||||||
matcher := newEqualMultiStringMatcher(testData.caseSensitive, len(testData.values))
|
// To keep this test simple, we always assume a min prefix length of 1.
|
||||||
|
minPrefixLength := 0
|
||||||
|
if len(testData.prefixes) > 0 {
|
||||||
|
minPrefixLength = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
matcher := newEqualMultiStringMatcher(testData.caseSensitive, len(testData.values), len(testData.prefixes), minPrefixLength)
|
||||||
for _, v := range testData.values {
|
for _, v := range testData.values {
|
||||||
matcher.add(v)
|
matcher.add(v)
|
||||||
}
|
}
|
||||||
|
for _, p := range testData.prefixes {
|
||||||
|
switch m := p.(type) {
|
||||||
|
case *literalPrefixSensitiveStringMatcher:
|
||||||
|
matcher.addPrefix(m.prefix, true, p)
|
||||||
|
case *literalPrefixInsensitiveStringMatcher:
|
||||||
|
matcher.addPrefix(m.prefix, false, p)
|
||||||
|
default:
|
||||||
|
panic("Unexpected type in test case")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, v := range testData.expectedMatches {
|
for _, v := range testData.expectedMatches {
|
||||||
require.True(t, matcher.Matches(v), "value: %s", v)
|
require.True(t, matcher.Matches(v), "value: %s", v)
|
||||||
|
@ -879,29 +975,33 @@ func TestEqualMultiStringMatcher_Matches(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFindEqualStringMatchers(t *testing.T) {
|
func TestFindEqualOrPrefixStringMatchers(t *testing.T) {
|
||||||
type match struct {
|
type match struct {
|
||||||
s string
|
s string
|
||||||
caseSensitive bool
|
caseSensitive bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Utility to call findEqualStringMatchers() and collect all callback invocations.
|
// Utility to call findEqualOrPrefixStringMatchers() and collect all callback invocations.
|
||||||
findEqualStringMatchersAndCollectMatches := func(input StringMatcher) (matches []match, ok bool) {
|
findEqualOrPrefixStringMatchersAndCollectMatches := func(input StringMatcher) (matches []match, ok bool) {
|
||||||
ok = findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
ok = findEqualOrPrefixStringMatchers(input, func(matcher *equalStringMatcher) bool {
|
||||||
matches = append(matches, match{matcher.s, matcher.caseSensitive})
|
matches = append(matches, match{matcher.s, matcher.caseSensitive})
|
||||||
return true
|
return true
|
||||||
|
}, func(prefix string, prefixCaseSensitive bool, right StringMatcher) bool {
|
||||||
|
matches = append(matches, match{prefix, prefixCaseSensitive})
|
||||||
|
return true
|
||||||
})
|
})
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Run("empty matcher", func(t *testing.T) {
|
t.Run("empty matcher", func(t *testing.T) {
|
||||||
actualMatches, actualOk := findEqualStringMatchersAndCollectMatches(emptyStringMatcher{})
|
actualMatches, actualOk := findEqualOrPrefixStringMatchersAndCollectMatches(emptyStringMatcher{})
|
||||||
require.False(t, actualOk)
|
require.False(t, actualOk)
|
||||||
require.Empty(t, actualMatches)
|
require.Empty(t, actualMatches)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("concat of literal matchers (case sensitive)", func(t *testing.T) {
|
t.Run("concat of literal matchers (case sensitive)", func(t *testing.T) {
|
||||||
actualMatches, actualOk := findEqualStringMatchersAndCollectMatches(
|
actualMatches, actualOk := findEqualOrPrefixStringMatchersAndCollectMatches(
|
||||||
orStringMatcher{
|
orStringMatcher{
|
||||||
&equalStringMatcher{s: "test-1", caseSensitive: true},
|
&equalStringMatcher{s: "test-1", caseSensitive: true},
|
||||||
&equalStringMatcher{s: "test-2", caseSensitive: true},
|
&equalStringMatcher{s: "test-2", caseSensitive: true},
|
||||||
|
@ -913,7 +1013,7 @@ func TestFindEqualStringMatchers(t *testing.T) {
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("concat of literal matchers (case insensitive)", func(t *testing.T) {
|
t.Run("concat of literal matchers (case insensitive)", func(t *testing.T) {
|
||||||
actualMatches, actualOk := findEqualStringMatchersAndCollectMatches(
|
actualMatches, actualOk := findEqualOrPrefixStringMatchersAndCollectMatches(
|
||||||
orStringMatcher{
|
orStringMatcher{
|
||||||
&equalStringMatcher{s: "test-1", caseSensitive: false},
|
&equalStringMatcher{s: "test-1", caseSensitive: false},
|
||||||
&equalStringMatcher{s: "test-2", caseSensitive: false},
|
&equalStringMatcher{s: "test-2", caseSensitive: false},
|
||||||
|
@ -925,7 +1025,7 @@ func TestFindEqualStringMatchers(t *testing.T) {
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("concat of literal matchers (mixed case)", func(t *testing.T) {
|
t.Run("concat of literal matchers (mixed case)", func(t *testing.T) {
|
||||||
actualMatches, actualOk := findEqualStringMatchersAndCollectMatches(
|
actualMatches, actualOk := findEqualOrPrefixStringMatchersAndCollectMatches(
|
||||||
orStringMatcher{
|
orStringMatcher{
|
||||||
&equalStringMatcher{s: "test-1", caseSensitive: false},
|
&equalStringMatcher{s: "test-1", caseSensitive: false},
|
||||||
&equalStringMatcher{s: "test-2", caseSensitive: true},
|
&equalStringMatcher{s: "test-2", caseSensitive: true},
|
||||||
|
@ -935,11 +1035,59 @@ func TestFindEqualStringMatchers(t *testing.T) {
|
||||||
require.True(t, actualOk)
|
require.True(t, actualOk)
|
||||||
require.Equal(t, []match{{"test-1", false}, {"test-2", true}}, actualMatches)
|
require.Equal(t, []match{{"test-1", false}, {"test-2", true}}, actualMatches)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("concat of literal prefix matchers (case sensitive)", func(t *testing.T) {
|
||||||
|
actualMatches, actualOk := findEqualOrPrefixStringMatchersAndCollectMatches(
|
||||||
|
orStringMatcher{
|
||||||
|
&literalPrefixSensitiveStringMatcher{prefix: "test-1"},
|
||||||
|
&literalPrefixSensitiveStringMatcher{prefix: "test-2"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
require.True(t, actualOk)
|
||||||
|
require.Equal(t, []match{{"test-1", true}, {"test-2", true}}, actualMatches)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("concat of literal prefix matchers (case insensitive)", func(t *testing.T) {
|
||||||
|
actualMatches, actualOk := findEqualOrPrefixStringMatchersAndCollectMatches(
|
||||||
|
orStringMatcher{
|
||||||
|
&literalPrefixInsensitiveStringMatcher{prefix: "test-1"},
|
||||||
|
&literalPrefixInsensitiveStringMatcher{prefix: "test-2"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
require.True(t, actualOk)
|
||||||
|
require.Equal(t, []match{{"test-1", false}, {"test-2", false}}, actualMatches)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("concat of literal prefix matchers (mixed case)", func(t *testing.T) {
|
||||||
|
actualMatches, actualOk := findEqualOrPrefixStringMatchersAndCollectMatches(
|
||||||
|
orStringMatcher{
|
||||||
|
&literalPrefixInsensitiveStringMatcher{prefix: "test-1"},
|
||||||
|
&literalPrefixSensitiveStringMatcher{prefix: "test-2"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
require.True(t, actualOk)
|
||||||
|
require.Equal(t, []match{{"test-1", false}, {"test-2", true}}, actualMatches)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("concat of literal string and prefix matchers (case sensitive)", func(t *testing.T) {
|
||||||
|
actualMatches, actualOk := findEqualOrPrefixStringMatchersAndCollectMatches(
|
||||||
|
orStringMatcher{
|
||||||
|
&equalStringMatcher{s: "test-1", caseSensitive: true},
|
||||||
|
&literalPrefixSensitiveStringMatcher{prefix: "test-2"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
require.True(t, actualOk)
|
||||||
|
require.Equal(t, []match{{"test-1", true}, {"test-2", true}}, actualMatches)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// This benchmark is used to find a good threshold to use to apply the optimization
|
// This benchmark is used to find a good threshold to use to apply the optimization
|
||||||
// done by optimizeEqualStringMatchers().
|
// done by optimizeEqualOrPrefixStringMatchers().
|
||||||
func BenchmarkOptimizeEqualStringMatchers(b *testing.B) {
|
func BenchmarkOptimizeEqualOrPrefixStringMatchers(b *testing.B) {
|
||||||
randGenerator := rand.New(rand.NewSource(time.Now().UnixNano()))
|
randGenerator := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
|
|
||||||
// Generate variable lengths random texts to match against.
|
// Generate variable lengths random texts to match against.
|
||||||
|
@ -949,12 +1097,20 @@ func BenchmarkOptimizeEqualStringMatchers(b *testing.B) {
|
||||||
|
|
||||||
for numAlternations := 2; numAlternations <= 256; numAlternations *= 2 {
|
for numAlternations := 2; numAlternations <= 256; numAlternations *= 2 {
|
||||||
for _, caseSensitive := range []bool{true, false} {
|
for _, caseSensitive := range []bool{true, false} {
|
||||||
b.Run(fmt.Sprintf("alternations: %d case sensitive: %t", numAlternations, caseSensitive), func(b *testing.B) {
|
for _, prefixMatcher := range []bool{true, false} {
|
||||||
|
b.Run(fmt.Sprintf("alternations: %d case sensitive: %t prefix matcher: %t", numAlternations, caseSensitive, prefixMatcher), func(b *testing.B) {
|
||||||
|
// If the test should run on prefix matchers, we add a wildcard matcher as suffix (prefix will be a literal).
|
||||||
|
suffix := ""
|
||||||
|
if prefixMatcher {
|
||||||
|
suffix = ".*"
|
||||||
|
}
|
||||||
|
|
||||||
// Generate a regex with the expected number of alternations.
|
// Generate a regex with the expected number of alternations.
|
||||||
re := strings.Join(randStrings(randGenerator, numAlternations, 10), "|")
|
re := strings.Join(randStringsWithSuffix(randGenerator, numAlternations, 10, suffix), "|")
|
||||||
if !caseSensitive {
|
if !caseSensitive {
|
||||||
re = "(?i:(" + re + "))"
|
re = "(?i:(" + re + "))"
|
||||||
}
|
}
|
||||||
|
b.Logf("regexp: %s", re)
|
||||||
|
|
||||||
parsed, err := syntax.Parse(re, syntax.Perl)
|
parsed, err := syntax.Parse(re, syntax.Perl)
|
||||||
require.NoError(b, err)
|
require.NoError(b, err)
|
||||||
|
@ -962,14 +1118,14 @@ func BenchmarkOptimizeEqualStringMatchers(b *testing.B) {
|
||||||
unoptimized := stringMatcherFromRegexpInternal(parsed)
|
unoptimized := stringMatcherFromRegexpInternal(parsed)
|
||||||
require.IsType(b, orStringMatcher{}, unoptimized)
|
require.IsType(b, orStringMatcher{}, unoptimized)
|
||||||
|
|
||||||
optimized := optimizeEqualStringMatchers(unoptimized, 0)
|
optimized := optimizeEqualOrPrefixStringMatchers(unoptimized, 0)
|
||||||
if numAlternations < minEqualMultiStringMatcherMapThreshold {
|
if numAlternations < minEqualMultiStringMatcherMapThreshold && !prefixMatcher {
|
||||||
require.IsType(b, &equalMultiStringSliceMatcher{}, optimized)
|
require.IsType(b, &equalMultiStringSliceMatcher{}, optimized)
|
||||||
} else {
|
} else {
|
||||||
require.IsType(b, &equalMultiStringMapMatcher{}, optimized)
|
require.IsType(b, &equalMultiStringMapMatcher{}, optimized)
|
||||||
}
|
}
|
||||||
|
|
||||||
b.Run("without optimizeEqualStringMatchers()", func(b *testing.B) {
|
b.Run("without optimizeEqualOrPrefixStringMatchers()", func(b *testing.B) {
|
||||||
for n := 0; n < b.N; n++ {
|
for n := 0; n < b.N; n++ {
|
||||||
for _, t := range texts {
|
for _, t := range texts {
|
||||||
unoptimized.Matches(t)
|
unoptimized.Matches(t)
|
||||||
|
@ -977,7 +1133,7 @@ func BenchmarkOptimizeEqualStringMatchers(b *testing.B) {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
b.Run("with optimizeEqualStringMatchers()", func(b *testing.B) {
|
b.Run("with optimizeEqualOrPrefixStringMatchers()", func(b *testing.B) {
|
||||||
for n := 0; n < b.N; n++ {
|
for n := 0; n < b.N; n++ {
|
||||||
for _, t := range texts {
|
for _, t := range texts {
|
||||||
optimized.Matches(t)
|
optimized.Matches(t)
|
||||||
|
@ -988,6 +1144,7 @@ func BenchmarkOptimizeEqualStringMatchers(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestZeroOrOneCharacterStringMatcher(t *testing.T) {
|
func TestZeroOrOneCharacterStringMatcher(t *testing.T) {
|
||||||
t.Run("match newline", func(t *testing.T) {
|
t.Run("match newline", func(t *testing.T) {
|
||||||
|
@ -1204,10 +1361,16 @@ func visitStringMatcher(matcher StringMatcher, callback func(matcher StringMatch
|
||||||
}
|
}
|
||||||
|
|
||||||
// No nested matchers for the following ones.
|
// No nested matchers for the following ones.
|
||||||
|
case *equalMultiStringMapMatcher:
|
||||||
|
for _, prefixes := range casted.prefixes {
|
||||||
|
for _, matcher := range prefixes {
|
||||||
|
visitStringMatcher(matcher, callback)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
case emptyStringMatcher:
|
case emptyStringMatcher:
|
||||||
case *equalStringMatcher:
|
case *equalStringMatcher:
|
||||||
case *equalMultiStringSliceMatcher:
|
case *equalMultiStringSliceMatcher:
|
||||||
case *equalMultiStringMapMatcher:
|
|
||||||
case anyStringWithoutNewlineMatcher:
|
case anyStringWithoutNewlineMatcher:
|
||||||
case *anyNonEmptyStringMatcher:
|
case *anyNonEmptyStringMatcher:
|
||||||
case trueMatcher:
|
case trueMatcher:
|
||||||
|
|
Loading…
Reference in New Issue