mirror of https://github.com/prometheus/prometheus
Merge pull request #9306 from dgl/zecke/fasta
promtool: Speed up checking for duplicate rules Running promtool check config on large rule files is rather slow. Improve this by adding a testcase, benchmark and changing the algorithm used to find duplicates.pull/9300/head
commit
c244fe27a3
|
@ -24,7 +24,7 @@ import (
|
|||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -48,6 +48,7 @@ import (
|
|||
_ "github.com/prometheus/prometheus/discovery/install" // Register service discovery implementations.
|
||||
"github.com/prometheus/prometheus/discovery/kubernetes"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/pkg/rulefmt"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
)
|
||||
|
@ -471,8 +472,8 @@ func checkRules(filename string) (int, []error) {
|
|||
fmt.Printf("%d duplicate rule(s) found.\n", len(dRules))
|
||||
for _, n := range dRules {
|
||||
fmt.Printf("Metric: %s\nLabel(s):\n", n.metric)
|
||||
for i, l := range n.label {
|
||||
fmt.Printf("\t%s: %s\n", i, l)
|
||||
for _, l := range n.label {
|
||||
fmt.Printf("\t%s: %s\n", l.Name, l.Value)
|
||||
}
|
||||
}
|
||||
fmt.Println("Might cause inconsistency while recording expressions.")
|
||||
|
@ -483,29 +484,52 @@ func checkRules(filename string) (int, []error) {
|
|||
|
||||
type compareRuleType struct {
|
||||
metric string
|
||||
label map[string]string
|
||||
label labels.Labels
|
||||
}
|
||||
|
||||
type compareRuleTypes []compareRuleType
|
||||
|
||||
func (c compareRuleTypes) Len() int { return len(c) }
|
||||
func (c compareRuleTypes) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
|
||||
func (c compareRuleTypes) Less(i, j int) bool { return compare(c[i], c[j]) < 0 }
|
||||
|
||||
func compare(a, b compareRuleType) int {
|
||||
if res := strings.Compare(a.metric, b.metric); res != 0 {
|
||||
return res
|
||||
}
|
||||
|
||||
return labels.Compare(a.label, b.label)
|
||||
}
|
||||
|
||||
func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType {
|
||||
var duplicates []compareRuleType
|
||||
var rules compareRuleTypes
|
||||
|
||||
for _, group := range groups {
|
||||
for index, rule := range group.Rules {
|
||||
inst := compareRuleType{
|
||||
|
||||
for _, rule := range group.Rules {
|
||||
rules = append(rules, compareRuleType{
|
||||
metric: ruleMetric(rule),
|
||||
label: rule.Labels,
|
||||
}
|
||||
for i := 0; i < index; i++ {
|
||||
t := compareRuleType{
|
||||
metric: ruleMetric(group.Rules[i]),
|
||||
label: group.Rules[i].Labels,
|
||||
}
|
||||
if reflect.DeepEqual(t, inst) {
|
||||
duplicates = append(duplicates, t)
|
||||
}
|
||||
}
|
||||
label: labels.FromMap(rule.Labels),
|
||||
})
|
||||
}
|
||||
}
|
||||
if len(rules) < 2 {
|
||||
return duplicates
|
||||
}
|
||||
sort.Sort(rules)
|
||||
|
||||
last := rules[0]
|
||||
for i := 1; i < len(rules); i++ {
|
||||
if compare(last, rules[i]) == 0 {
|
||||
// Don't add a duplicated rule multiple times.
|
||||
if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 {
|
||||
duplicates = append(duplicates, rules[i])
|
||||
}
|
||||
}
|
||||
last = rules[i]
|
||||
}
|
||||
|
||||
return duplicates
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,8 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/pkg/rulefmt"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
|
@ -118,3 +120,46 @@ func TestCheckSDFile(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckDuplicates(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
ruleFile string
|
||||
expectedDups []compareRuleType
|
||||
}{
|
||||
{
|
||||
name: "no duplicates",
|
||||
ruleFile: "./testdata/rules.yml",
|
||||
},
|
||||
{
|
||||
name: "duplicate in other group",
|
||||
ruleFile: "./testdata/rules_duplicates.yml",
|
||||
expectedDups: []compareRuleType{
|
||||
{
|
||||
metric: "job:test:count_over_time1m",
|
||||
label: labels.New(),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range cases {
|
||||
c := test
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
rgs, err := rulefmt.ParseFile(c.ruleFile)
|
||||
require.Empty(t, err)
|
||||
dups := checkDuplicates(rgs.Groups)
|
||||
require.Equal(t, c.expectedDups, dups)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCheckDuplicates(b *testing.B) {
|
||||
rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml")
|
||||
require.Empty(b, err)
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
checkDuplicates(rgs.Groups)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
# This is a rules file with duplicate expressions
|
||||
|
||||
groups:
|
||||
- name: base
|
||||
rules:
|
||||
- record: job:test:count_over_time1m
|
||||
expr: sum without(instance) (count_over_time(test[1m]))
|
||||
|
||||
# A recording rule that doesn't depend on input series.
|
||||
- record: fixed_data
|
||||
expr: 1
|
||||
|
||||
# Subquery with default resolution test.
|
||||
- record: suquery_interval_test
|
||||
expr: count_over_time(up[5m:])
|
||||
|
||||
# Duplicating
|
||||
- record: job:test:count_over_time1m
|
||||
expr: sum without(instance) (count_over_time(test[1m]))
|
||||
|
||||
- name: duplicate
|
||||
rules:
|
||||
- record: job:test:count_over_time1m
|
||||
expr: sum without(instance) (count_over_time(test[1m]))
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue