From ec31acaf0241bb061ae2621a0b8ed4131bf1146f Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 1 Jul 2024 10:12:50 +0200 Subject: [PATCH] FastRegexMatcher: small optimization for the literal prefix case Signed-off-by: Marco Pracucci --- model/labels/regexp.go | 48 +++++++++++++++++++++++++++---------- model/labels/regexp_test.go | 48 +++++++++++++++++++++---------------- 2 files changed, 63 insertions(+), 33 deletions(-) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 1e9db882b..767bd6942 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -549,11 +549,7 @@ func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher { // Right matcher with 1 fixed set match. case left == nil && len(matches) == 1: - return &literalPrefixStringMatcher{ - prefix: matches[0], - prefixCaseSensitive: matchesCaseSensitive, - right: right, - } + return newLiteralPrefixStringMatcher(matches[0], matchesCaseSensitive, right) // Left matcher with 1 fixed set match. case right == nil && len(matches) == 1: @@ -631,21 +627,47 @@ func (m *containsStringMatcher) Matches(s string) bool { return false } -// literalPrefixStringMatcher matches a string with the given literal prefix and right side matcher. -type literalPrefixStringMatcher struct { - prefix string - prefixCaseSensitive bool +func newLiteralPrefixStringMatcher(prefix string, prefixCaseSensitive bool, right StringMatcher) StringMatcher { + if prefixCaseSensitive { + return &literalPrefixSensitiveStringMatcher{ + prefix: prefix, + right: right, + } + } + + return &literalPrefixInsensitiveStringMatcher{ + prefix: prefix, + right: right, + } +} + +// literalPrefixSensitiveStringMatcher matches a string with the given literal case-sensitive prefix and right side matcher. +type literalPrefixSensitiveStringMatcher struct { + prefix string // The matcher that must match the right side. Can be nil. right StringMatcher } -func (m *literalPrefixStringMatcher) Matches(s string) bool { - // Ensure the prefix matches. - if m.prefixCaseSensitive && !strings.HasPrefix(s, m.prefix) { +func (m *literalPrefixSensitiveStringMatcher) Matches(s string) bool { + if !strings.HasPrefix(s, m.prefix) { return false } - if !m.prefixCaseSensitive && !hasPrefixCaseInsensitive(s, m.prefix) { + + // Ensure the right side matches. + return m.right.Matches(s[len(m.prefix):]) +} + +// literalPrefixInsensitiveStringMatcher matches a string with the given literal case-insensitive prefix and right side matcher. +type literalPrefixInsensitiveStringMatcher struct { + prefix string + + // The matcher that must match the right side. Can be nil. + right StringMatcher +} + +func (m *literalPrefixInsensitiveStringMatcher) Matches(s string) bool { + if !hasPrefixCaseInsensitive(s, m.prefix) { return false } diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 008eae702..fa5c96f42 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -376,7 +376,7 @@ func TestStringMatcherFromRegexp(t *testing.T) { {"10\\.0\\.(1|2)\\.+", nil}, {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{}, suffix: "foo", suffixCaseSensitive: true}}, - {"foo-.*$", &literalPrefixStringMatcher{prefix: "foo-", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}}, + {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: anyStringWithoutNewlineMatcher{}}}, {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: false}}})}, @@ -391,15 +391,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { {".*foo.*bar.*", nil}, {`\d*`, nil}, {".", nil}, - {"/|/bar.*", &literalPrefixStringMatcher{prefix: "/", prefixCaseSensitive: true, right: orStringMatcher{emptyStringMatcher{}, &literalPrefixStringMatcher{prefix: "bar", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}}}}, + {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: anyStringWithoutNewlineMatcher{}}}}}, // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat. // It would make the code too complex to handle it. {"(.+)/(foo.*|bar$)", nil}, // Case sensitive alternate with same literal prefix and .* suffix. - {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixStringMatcher{prefix: "xyz-016a-ixb-", prefixCaseSensitive: true, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "dp", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "op", prefixCaseSensitive: true, right: anyStringWithoutNewlineMatcher{}}}}}, + {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: anyStringWithoutNewlineMatcher{}}}}}, // Case insensitive alternate with same literal prefix and .* suffix. - {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixStringMatcher{prefix: "XYZ-016A-IXB-", prefixCaseSensitive: false, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "DP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "OP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}}}}, - {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixStringMatcher{prefix: "XYZ-016A-IXB-", prefixCaseSensitive: false, right: orStringMatcher{&literalPrefixStringMatcher{prefix: "DP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}, &literalPrefixStringMatcher{prefix: "OP", prefixCaseSensitive: false, right: anyStringWithoutNewlineMatcher{}}}}}, + {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, + {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, // Concatenated variable length selectors are not supported. {"foo.*.*", nil}, {"foo.+.+", nil}, @@ -408,9 +408,9 @@ func TestStringMatcherFromRegexp(t *testing.T) { {"aaa.?.?", nil}, {"aaa.?.*", nil}, // Regexps with ".?". - {"ext.?|xfs", orStringMatcher{&literalPrefixStringMatcher{prefix: "ext", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, - {"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixStringMatcher{prefix: "ext", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, - {"foo.?", &literalPrefixStringMatcher{prefix: "foo", prefixCaseSensitive: true, right: &zeroOrOneCharacterStringMatcher{matchNL: false}}}, + {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, + {"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, + {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}}, {"f.?o", nil}, } { c := c @@ -480,10 +480,13 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) { re := regexp.MustCompile("^" + c.pattern + "$") - // Pre-condition check: ensure it contains literalPrefixStringMatcher. + // Pre-condition check: ensure it contains literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher. numPrefixMatchers := 0 visitStringMatcher(matcher, func(matcher StringMatcher) { - if _, ok := matcher.(*literalPrefixStringMatcher); ok { + if _, ok := matcher.(*literalPrefixSensitiveStringMatcher); ok { + numPrefixMatchers++ + } + if _, ok := matcher.(*literalPrefixInsensitiveStringMatcher); ok { numPrefixMatchers++ } }) @@ -1074,20 +1077,14 @@ func BenchmarkZeroOrOneCharacterStringMatcher(b *testing.B) { } } -func TestLiteralPrefixStringMatcher(t *testing.T) { - m := &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: true, right: &emptyStringMatcher{}} +func TestLiteralPrefixSensitiveStringMatcher(t *testing.T) { + m := &literalPrefixSensitiveStringMatcher{prefix: "mar", right: &emptyStringMatcher{}} require.True(t, m.Matches("mar")) require.False(t, m.Matches("marco")) require.False(t, m.Matches("ma")) require.False(t, m.Matches("mAr")) - m = &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: false, right: &emptyStringMatcher{}} - require.True(t, m.Matches("mar")) - require.False(t, m.Matches("marco")) - require.False(t, m.Matches("ma")) - require.True(t, m.Matches("mAr")) - - m = &literalPrefixStringMatcher{prefix: "mar", prefixCaseSensitive: true, right: &equalStringMatcher{s: "co", caseSensitive: false}} + m = &literalPrefixSensitiveStringMatcher{prefix: "mar", right: &equalStringMatcher{s: "co", caseSensitive: false}} require.True(t, m.Matches("marco")) require.True(t, m.Matches("marCO")) require.False(t, m.Matches("MARco")) @@ -1095,6 +1092,14 @@ func TestLiteralPrefixStringMatcher(t *testing.T) { require.False(t, m.Matches("marcopracucci")) } +func TestLiteralPrefixInsensitiveStringMatcher(t *testing.T) { + m := &literalPrefixInsensitiveStringMatcher{prefix: "mar", right: &emptyStringMatcher{}} + require.True(t, m.Matches("mar")) + require.False(t, m.Matches("marco")) + require.False(t, m.Matches("ma")) + require.True(t, m.Matches("mAr")) +} + func TestLiteralSuffixStringMatcher(t *testing.T) { m := &literalSuffixStringMatcher{left: &emptyStringMatcher{}, suffix: "co", suffixCaseSensitive: true} require.True(t, m.Matches("co")) @@ -1184,7 +1189,10 @@ func visitStringMatcher(matcher StringMatcher, callback func(matcher StringMatch visitStringMatcher(casted.right, callback) } - case *literalPrefixStringMatcher: + case *literalPrefixSensitiveStringMatcher: + visitStringMatcher(casted.right, callback) + + case *literalPrefixInsensitiveStringMatcher: visitStringMatcher(casted.right, callback) case *literalSuffixStringMatcher: