From 20e3c295aea5992a4d8ea8e7e45ed9c9d6639813 Mon Sep 17 00:00:00 2001 From: "Xiaochao Dong (@damnever)" Date: Fri, 24 Mar 2023 17:18:24 +0800 Subject: [PATCH 001/339] Optimize constant label pair adding with relabel.Replace Signed-off-by: Xiaochao Dong (@damnever) --- model/relabel/relabel.go | 15 +++++++++ model/relabel/relabel_test.go | 60 +++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index d29c3d07a..83b1a9147 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -267,6 +267,17 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { return false } case Replace: + // Fast path to add or delete label pair. + if val == "" && cfg.Regex == DefaultRelabelConfig.Regex && + !varInRegexTemplate(cfg.TargetLabel) && !varInRegexTemplate(cfg.Replacement) { + if !model.LabelName(cfg.TargetLabel).IsValid() || cfg.Replacement == "" { + lb.Del(cfg.TargetLabel) + } else { + lb.Set(cfg.TargetLabel, cfg.Replacement) + } + break + } + indexes := cfg.Regex.FindStringSubmatchIndex(val) // If there is no match no replacement must take place. if indexes == nil { @@ -316,3 +327,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { return true } + +func varInRegexTemplate(template string) bool { + return strings.Contains(template, "$") +} diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index 517b9b822..d3815afe6 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -15,6 +15,7 @@ package relabel import ( "fmt" + "sort" "testing" "github.com/prometheus/common/model" @@ -850,3 +851,62 @@ func BenchmarkRelabel(b *testing.B) { }) } } + +func BenchmarkRelabel_ReplaceAddLabel(b *testing.B) { + cfgs := []*Config{} + for k, v := range map[string]string{ + "wwwwww": "wwwwww", + "xxxxxxxxx": "xxxxxxxxx", + "yyyyyyyyyyyy": "yyyyyyyyyyyy", + "${0}": "dropped", + "dropped": "${0}", + } { + cfgs = append(cfgs, &Config{ + Action: DefaultRelabelConfig.Action, + Separator: DefaultRelabelConfig.Separator, + Regex: DefaultRelabelConfig.Regex, + TargetLabel: k, + Replacement: v, + }) + } + expectLset := labels.Labels{ + labels.Label{Name: "abcdefg01", Value: "hijklmn1"}, + labels.Label{Name: "abcdefg02", Value: "hijklmn2"}, + labels.Label{Name: "abcdefg03", Value: "hijklmn3"}, + labels.Label{Name: "abcdefg04", Value: "hijklmn4"}, + labels.Label{Name: "abcdefg05", Value: "hijklmn5"}, + labels.Label{Name: "abcdefg06", Value: "hijklmn6"}, + labels.Label{Name: "abcdefg07", Value: "hijklmn7"}, + labels.Label{Name: "abcdefg08", Value: "hijklmn8"}, + labels.Label{Name: "abcdefg09", Value: "hijklmn9"}, + labels.Label{Name: "abcdefg10", Value: "hijklmn10"}, + labels.Label{Name: "abcdefg11", Value: "hijklmn11"}, + labels.Label{Name: "abcdefg12", Value: "hijklmn12"}, + labels.Label{Name: "abcdefg13", Value: "hijklmn13"}, + labels.Label{Name: "wwwwww", Value: "wwwwww"}, + labels.Label{Name: "xxxxxxxxx", Value: "xxxxxxxxx"}, + labels.Label{Name: "yyyyyyyyyyyy", Value: "yyyyyyyyyyyy"}, + } + sort.Sort(expectLset) + + for i := 0; i < b.N; i++ { + lset := labels.Labels{ + labels.Label{Name: "abcdefg01", Value: "hijklmn1"}, + labels.Label{Name: "abcdefg02", Value: "hijklmn2"}, + labels.Label{Name: "abcdefg03", Value: "hijklmn3"}, + labels.Label{Name: "abcdefg04", Value: "hijklmn4"}, + labels.Label{Name: "abcdefg05", Value: "hijklmn5"}, + labels.Label{Name: "abcdefg06", Value: "hijklmn6"}, + labels.Label{Name: "abcdefg07", Value: "hijklmn7"}, + labels.Label{Name: "abcdefg08", Value: "hijklmn8"}, + labels.Label{Name: "abcdefg09", Value: "hijklmn9"}, + labels.Label{Name: "abcdefg10", Value: "hijklmn10"}, + labels.Label{Name: "abcdefg11", Value: "hijklmn11"}, + labels.Label{Name: "abcdefg12", Value: "hijklmn12"}, + labels.Label{Name: "abcdefg13", Value: "hijklmn13"}, + } + actual, _ := Process(lset, cfgs...) + var _ = actual + // require.Equal(b, actual, expectLset) + } +} From 2d0d3333712e596ec2f1f4e4596fdb7ae0466e6f Mon Sep 17 00:00:00 2001 From: "Xiaochao Dong (@damnever)" Date: Sat, 25 Mar 2023 10:42:20 +0800 Subject: [PATCH 002/339] Fix lint issue Signed-off-by: Xiaochao Dong (@damnever) --- model/relabel/relabel_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index d3815afe6..bd51b9771 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -905,8 +905,7 @@ func BenchmarkRelabel_ReplaceAddLabel(b *testing.B) { labels.Label{Name: "abcdefg12", Value: "hijklmn12"}, labels.Label{Name: "abcdefg13", Value: "hijklmn13"}, } - actual, _ := Process(lset, cfgs...) - var _ = actual + _, _ = Process(lset, cfgs...) // require.Equal(b, actual, expectLset) } } From 1601b2a79e0116f9d3a3e30915a6899e73c96feb Mon Sep 17 00:00:00 2001 From: "Xiaochao Dong (@damnever)" Date: Wed, 29 Mar 2023 11:20:59 +0800 Subject: [PATCH 003/339] check new line in target Signed-off-by: Xiaochao Dong (@damnever) --- model/relabel/relabel.go | 5 +++++ model/relabel/relabel_test.go | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index 83b1a9147..7607138b5 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -269,6 +269,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { case Replace: // Fast path to add or delete label pair. if val == "" && cfg.Regex == DefaultRelabelConfig.Regex && + !containsNewLine(cfg.TargetLabel) && !varInRegexTemplate(cfg.TargetLabel) && !varInRegexTemplate(cfg.Replacement) { if !model.LabelName(cfg.TargetLabel).IsValid() || cfg.Replacement == "" { lb.Del(cfg.TargetLabel) @@ -331,3 +332,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { func varInRegexTemplate(template string) bool { return strings.Contains(template, "$") } + +func containsNewLine(s string) bool { + return strings.Contains(s, "\r\n") || strings.Contains(s, "\n") +} diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index bd51b9771..86844cf28 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -858,6 +858,8 @@ func BenchmarkRelabel_ReplaceAddLabel(b *testing.B) { "wwwwww": "wwwwww", "xxxxxxxxx": "xxxxxxxxx", "yyyyyyyyyyyy": "yyyyyyyyyyyy", + "new\nline1": "dropped", + "new\r\nline2": "dropped", "${0}": "dropped", "dropped": "${0}", } { @@ -905,7 +907,7 @@ func BenchmarkRelabel_ReplaceAddLabel(b *testing.B) { labels.Label{Name: "abcdefg12", Value: "hijklmn12"}, labels.Label{Name: "abcdefg13", Value: "hijklmn13"}, } - _, _ = Process(lset, cfgs...) - // require.Equal(b, actual, expectLset) + actual, _ := Process(lset, cfgs...) + require.Equal(b, actual, expectLset) } } From b3b5c0022e5c05088733519f104db2c8b5c54974 Mon Sep 17 00:00:00 2001 From: "Xiaochao Dong (@damnever)" Date: Mon, 25 Dec 2023 15:14:25 +0800 Subject: [PATCH 004/339] Simplify the logic as per the comments Signed-off-by: Xiaochao Dong (@damnever) --- model/relabel/relabel.go | 11 +---- model/relabel/relabel_test.go | 89 +++++++++++------------------------ 2 files changed, 29 insertions(+), 71 deletions(-) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index 7607138b5..d169ed2f2 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -269,13 +269,8 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { case Replace: // Fast path to add or delete label pair. if val == "" && cfg.Regex == DefaultRelabelConfig.Regex && - !containsNewLine(cfg.TargetLabel) && !varInRegexTemplate(cfg.TargetLabel) && !varInRegexTemplate(cfg.Replacement) { - if !model.LabelName(cfg.TargetLabel).IsValid() || cfg.Replacement == "" { - lb.Del(cfg.TargetLabel) - } else { - lb.Set(cfg.TargetLabel, cfg.Replacement) - } + lb.Set(cfg.TargetLabel, cfg.Replacement) break } @@ -332,7 +327,3 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { func varInRegexTemplate(template string) bool { return strings.Contains(template, "$") } - -func containsNewLine(s string) bool { - return strings.Contains(s, "\r\n") || strings.Contains(s, "\n") -} diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index 86844cf28..7652798f5 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -15,7 +15,6 @@ package relabel import ( "fmt" - "sort" "testing" "github.com/prometheus/common/model" @@ -838,6 +837,34 @@ func BenchmarkRelabel(b *testing.B) { "__scrape_timeout__", "10s", "job", "kubernetes-pods"), }, + { + name: "static label pair", + config: ` + - replacement: wwwwww + target_label: wwwwww + - replacement: yyyyyyyyyyyy + target_label: xxxxxxxxx + - replacement: xxxxxxxxx + target_label: yyyyyyyyyyyy + - source_labels: ["something"] + target_label: with_source_labels + replacement: value + - replacement: dropped + target_label: ${0} + - replacement: ${0} + target_label: dropped`, + lbls: labels.FromStrings( + "abcdefg01", "hijklmn1", + "abcdefg02", "hijklmn2", + "abcdefg03", "hijklmn3", + "abcdefg04", "hijklmn4", + "abcdefg05", "hijklmn5", + "abcdefg06", "hijklmn6", + "abcdefg07", "hijklmn7", + "abcdefg08", "hijklmn8", + "job", "foo", + ), + }, } for i := range tests { err := yaml.UnmarshalStrict([]byte(tests[i].config), &tests[i].cfgs) @@ -851,63 +878,3 @@ func BenchmarkRelabel(b *testing.B) { }) } } - -func BenchmarkRelabel_ReplaceAddLabel(b *testing.B) { - cfgs := []*Config{} - for k, v := range map[string]string{ - "wwwwww": "wwwwww", - "xxxxxxxxx": "xxxxxxxxx", - "yyyyyyyyyyyy": "yyyyyyyyyyyy", - "new\nline1": "dropped", - "new\r\nline2": "dropped", - "${0}": "dropped", - "dropped": "${0}", - } { - cfgs = append(cfgs, &Config{ - Action: DefaultRelabelConfig.Action, - Separator: DefaultRelabelConfig.Separator, - Regex: DefaultRelabelConfig.Regex, - TargetLabel: k, - Replacement: v, - }) - } - expectLset := labels.Labels{ - labels.Label{Name: "abcdefg01", Value: "hijklmn1"}, - labels.Label{Name: "abcdefg02", Value: "hijklmn2"}, - labels.Label{Name: "abcdefg03", Value: "hijklmn3"}, - labels.Label{Name: "abcdefg04", Value: "hijklmn4"}, - labels.Label{Name: "abcdefg05", Value: "hijklmn5"}, - labels.Label{Name: "abcdefg06", Value: "hijklmn6"}, - labels.Label{Name: "abcdefg07", Value: "hijklmn7"}, - labels.Label{Name: "abcdefg08", Value: "hijklmn8"}, - labels.Label{Name: "abcdefg09", Value: "hijklmn9"}, - labels.Label{Name: "abcdefg10", Value: "hijklmn10"}, - labels.Label{Name: "abcdefg11", Value: "hijklmn11"}, - labels.Label{Name: "abcdefg12", Value: "hijklmn12"}, - labels.Label{Name: "abcdefg13", Value: "hijklmn13"}, - labels.Label{Name: "wwwwww", Value: "wwwwww"}, - labels.Label{Name: "xxxxxxxxx", Value: "xxxxxxxxx"}, - labels.Label{Name: "yyyyyyyyyyyy", Value: "yyyyyyyyyyyy"}, - } - sort.Sort(expectLset) - - for i := 0; i < b.N; i++ { - lset := labels.Labels{ - labels.Label{Name: "abcdefg01", Value: "hijklmn1"}, - labels.Label{Name: "abcdefg02", Value: "hijklmn2"}, - labels.Label{Name: "abcdefg03", Value: "hijklmn3"}, - labels.Label{Name: "abcdefg04", Value: "hijklmn4"}, - labels.Label{Name: "abcdefg05", Value: "hijklmn5"}, - labels.Label{Name: "abcdefg06", Value: "hijklmn6"}, - labels.Label{Name: "abcdefg07", Value: "hijklmn7"}, - labels.Label{Name: "abcdefg08", Value: "hijklmn8"}, - labels.Label{Name: "abcdefg09", Value: "hijklmn9"}, - labels.Label{Name: "abcdefg10", Value: "hijklmn10"}, - labels.Label{Name: "abcdefg11", Value: "hijklmn11"}, - labels.Label{Name: "abcdefg12", Value: "hijklmn12"}, - labels.Label{Name: "abcdefg13", Value: "hijklmn13"}, - } - actual, _ := Process(lset, cfgs...) - require.Equal(b, actual, expectLset) - } -} From aa3e58358b9ea5316dcdfdf69b607f56966efba2 Mon Sep 17 00:00:00 2001 From: Daniel Kimsey Date: Tue, 15 Nov 2022 15:30:12 -0600 Subject: [PATCH 005/339] consul: Add support for catalog list services filter This adds support for Consul's Catalog [List Services][^1] API's `filter` parameter added in 1.14.x. This parameter grants the operator more flexibility to do server-side filtering of the Catalog, before Prometheus subscribes for updates. Operators can use this to improve both the performance of Prometheus's Consul SD and reduce the impact of enumerating large catalogs. [^1]: https://developer.hashicorp.com/consul/api-docs/v1.14.x/catalog Signed-off-by: Daniel Kimsey --- discovery/consul/consul.go | 10 ++++++++-- discovery/consul/consul_test.go | 23 +++++++++++++++++++++++ docs/configuration/configuration.md | 9 ++++++--- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index 40eed7697..d75ad6a1b 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -113,8 +113,11 @@ type SDConfig struct { Services []string `yaml:"services,omitempty"` // A list of tags used to filter instances inside a service. Services must contain all tags in the list. ServiceTags []string `yaml:"tags,omitempty"` - // Desired node metadata. + // Desired node metadata. As of Consul 1.14, consider `filter` instead. NodeMeta map[string]string `yaml:"node_meta,omitempty"` + // Consul filter string + // See https://www.consul.io/api-docs/catalog#filtering-1, for syntax + Filter string `yaml:"filter,omitempty"` HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` } @@ -174,6 +177,7 @@ type Discovery struct { watchedServices []string // Set of services which will be discovered. watchedTags []string // Tags used to filter instances of a service. watchedNodeMeta map[string]string + watchedFilter string allowStale bool refreshInterval time.Duration finalizer func() @@ -218,6 +222,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere watchedServices: conf.Services, watchedTags: conf.ServiceTags, watchedNodeMeta: conf.NodeMeta, + watchedFilter: conf.Filter, allowStale: conf.AllowStale, refreshInterval: time.Duration(conf.RefreshInterval), clientDatacenter: conf.Datacenter, @@ -361,13 +366,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { // entire list of services. func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) { catalog := d.client.Catalog() - level.Debug(d.logger).Log("msg", "Watching services", "tags", strings.Join(d.watchedTags, ",")) + level.Debug(d.logger).Log("msg", "Watching services", "tags", strings.Join(d.watchedTags, ","), "filter", d.watchedFilter) opts := &consul.QueryOptions{ WaitIndex: *lastIndex, WaitTime: watchTimeout, AllowStale: d.allowStale, NodeMeta: d.watchedNodeMeta, + Filter: d.watchedFilter, } t0 := time.Now() srvs, meta, err := catalog.Services(opts.WithContext(ctx)) diff --git a/discovery/consul/consul_test.go b/discovery/consul/consul_test.go index e3bc7938f..641895641 100644 --- a/discovery/consul/consul_test.go +++ b/discovery/consul/consul_test.go @@ -252,6 +252,8 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { case "/v1/catalog/services?index=1&wait=120000ms": time.Sleep(5 * time.Second) response = ServicesTestAnswer + case "/v1/catalog/services?filter=NodeMeta.rack_name+%3D%3D+%222304%22&index=1&wait=120000ms": + response = ServicesTestAnswer default: t.Errorf("Unhandled consul call: %s", r.URL) } @@ -369,6 +371,27 @@ func TestAllOptions(t *testing.T) { <-ch } +// Watch the test service with a specific tag and node-meta via Filter parameter. +func TestFilterOption(t *testing.T) { + stub, config := newServer(t) + defer stub.Close() + + config.Services = []string{"test"} + config.Filter = `NodeMeta.rack_name == "2304"` + config.Token = "fake-token" + + d := newDiscovery(t, config) + + ctx, cancel := context.WithCancel(context.Background()) + ch := make(chan []*targetgroup.Group) + go func() { + d.Run(ctx, ch) + close(ch) + }() + checkOneTarget(t, <-ch) + cancel() +} + func TestGetDatacenterShouldReturnError(t *testing.T) { for _, tc := range []struct { handler func(http.ResponseWriter, *http.Request) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index d751a4084..24b913f70 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -716,14 +716,17 @@ The following meta labels are available on targets during [relabeling](#relabel_ services: [ - ] -# See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more -# about the possible filters that can be used. +# A Consul Filter expression used to filter the catalog results +# See https://www.consul.io/api-docs/catalog#list-services to know more +# about the filter expressions that can be used. +[ filter: ] +# The `tags` and `node_meta` fields are deprecated in Consul in favor of `filter`. # An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list. tags: [ - ] -# Node metadata key/value pairs to filter nodes for a given service. +# Node metadata key/value pairs to filter nodes for a given service. As of Consul 1.14, consider `filter` instead. [ node_meta: [ : ... ] ] From f253d36361ef67228ecf3bfc3d8b359e05d35606 Mon Sep 17 00:00:00 2001 From: Seena Fallah Date: Tue, 18 Oct 2022 20:43:32 +0200 Subject: [PATCH 006/339] rule: allow merging labels from group level Support merging labels from groups to rule labels Signed-off-by: Seena Fallah --- cmd/promtool/main.go | 21 ++++++++-------- docs/configuration/alerting_rules.md | 2 ++ docs/configuration/recording_rules.md | 5 ++++ model/rulefmt/rulefmt.go | 25 +++++++++++++++---- model/rulefmt/rulefmt_test.go | 17 +++++++++++++ rules/manager.go | 19 ++++++++++++-- rules/manager_test.go | 25 ++++++++++++++++--- .../src/pages/alerts/AlertContents.tsx | 3 +++ .../src/pages/rules/RulesContent.tsx | 5 ++-- 9 files changed, 99 insertions(+), 23 deletions(-) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index e1d275e97..93527dcdb 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -58,6 +58,7 @@ import ( _ "github.com/prometheus/prometheus/plugins" // Register plugins. "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/promqltest" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/util/documentcli" ) @@ -889,30 +890,30 @@ func compare(a, b compareRuleType) int { func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType { var duplicates []compareRuleType - var rules compareRuleTypes + var cRules compareRuleTypes for _, group := range groups { for _, rule := range group.Rules { - rules = append(rules, compareRuleType{ + cRules = append(cRules, compareRuleType{ metric: ruleMetric(rule), - label: labels.FromMap(rule.Labels), + label: rules.FromMaps(group.Labels, rule.Labels), }) } } - if len(rules) < 2 { + if len(cRules) < 2 { return duplicates } - sort.Sort(rules) + sort.Sort(cRules) - last := rules[0] - for i := 1; i < len(rules); i++ { - if compare(last, rules[i]) == 0 { + last := cRules[0] + for i := 1; i < len(cRules); i++ { + if compare(last, cRules[i]) == 0 { // Don't add a duplicated rule multiple times. if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 { - duplicates = append(duplicates, rules[i]) + duplicates = append(duplicates, cRules[i]) } } - last = rules[i] + last = cRules[i] } return duplicates diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md index 3c1ec84f0..4d7c75e49 100644 --- a/docs/configuration/alerting_rules.md +++ b/docs/configuration/alerting_rules.md @@ -21,6 +21,8 @@ An example rules file with an alert would be: ```yaml groups: - name: example + labels: + team: myteam rules: - alert: HighRequestLatency expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index 9aa226bbc..9a8e7a70c 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -89,6 +89,11 @@ name: # Offset the rule evaluation timestamp of this particular group by the specified duration into the past. [ query_offset: | default = global.rule_query_offset ] +# Labels to add or overwrite before storing the result for its rules. +# Labels defined in will override the key if it has a collision. +labels: + [ : ] + rules: [ - ... ] ``` diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index bfb85ce74..ef6ac17fe 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -111,6 +111,20 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { ) } + for k, v := range g.Labels { + if !model.LabelName(k).IsValid() || k == model.MetricNameLabel { + errs = append( + errs, fmt.Errorf("invalid label name: %s", k), + ) + } + + if !model.LabelValue(v).IsValid() { + errs = append( + errs, fmt.Errorf("invalid label value: %s", v), + ) + } + } + set[g.Name] = struct{}{} for i, r := range g.Rules { @@ -136,11 +150,12 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { // RuleGroup is a list of sequentially evaluated recording and alerting rules. type RuleGroup struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - QueryOffset *model.Duration `yaml:"query_offset,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + QueryOffset *model.Duration `yaml:"query_offset,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []RuleNode `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` } // Rule describes an alerting or recording rule. diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index ef5008f4b..a46fcc898 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -108,6 +108,23 @@ groups: severity: "page" annotations: summary: "Instance {{ $labels.instance }} down" +`, + shouldPass: true, + }, + { + ruleString: ` +groups: +- name: example + labels: + team: myteam + rules: + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: "page" + annotations: + summary: "Instance {{ $labels.instance }} down" `, shouldPass: true, }, diff --git a/rules/manager.go b/rules/manager.go index 9e5b33fbc..3ac14545b 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -312,13 +312,15 @@ func (m *Manager) LoadGroups( return nil, []error{fmt.Errorf("%s: %w", fn, err)} } + mLabels := FromMaps(rg.Labels, r.Labels) + if r.Alert.Value != "" { rules = append(rules, NewAlertingRule( r.Alert.Value, expr, time.Duration(r.For), time.Duration(r.KeepFiringFor), - labels.FromMap(r.Labels), + mLabels, labels.FromMap(r.Annotations), externalLabels, externalURL, @@ -330,7 +332,7 @@ func (m *Manager) LoadGroups( rules = append(rules, NewRecordingRule( r.Record.Value, expr, - labels.FromMap(r.Labels), + mLabels, )) } @@ -501,3 +503,16 @@ func (c sequentialRuleEvalController) Allow(_ context.Context, _ *Group, _ Rule) } func (c sequentialRuleEvalController) Done(_ context.Context) {} + +// FromMaps returns new sorted Labels from the given maps, overriding each other in order. +func FromMaps(maps ...map[string]string) labels.Labels { + mLables := make(map[string]string) + + for _, m := range maps { + for k, v := range m { + mLables[k] = v + } + } + + return labels.FromMap(mLables) +} diff --git a/rules/manager_test.go b/rules/manager_test.go index 51239e6c9..27e992ffa 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -853,10 +853,11 @@ type ruleGroupsTest struct { // ruleGroupTest forms a testing struct for running tests over rules. type ruleGroupTest struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []rulefmt.Rule `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []rulefmt.Rule `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` } func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { @@ -879,6 +880,7 @@ func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { Interval: g.Interval, Limit: g.Limit, Rules: rtmp, + Labels: g.Labels, }) } return ruleGroupsTest{ @@ -2154,3 +2156,18 @@ func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.I }, } } + +func TestLabels_FromMaps(t *testing.T) { + mLabels := FromMaps( + map[string]string{"aaa": "101", "bbb": "222"}, + map[string]string{"aaa": "111", "ccc": "333"}, + ) + + expected := labels.New( + labels.Label{Name: "aaa", Value: "111"}, + labels.Label{Name: "bbb", Value: "222"}, + labels.Label{Name: "ccc", Value: "333"}, + ) + + require.Equal(t, expected, mLabels, "unexpected labelset") +} diff --git a/web/ui/react-app/src/pages/alerts/AlertContents.tsx b/web/ui/react-app/src/pages/alerts/AlertContents.tsx index a619f69fc..c5aed9183 100644 --- a/web/ui/react-app/src/pages/alerts/AlertContents.tsx +++ b/web/ui/react-app/src/pages/alerts/AlertContents.tsx @@ -37,6 +37,7 @@ interface RuleGroup { file: string; rules: Rule[]; interval: number; + labels: Record; } const kvSearchRule = new KVSearch({ @@ -93,6 +94,7 @@ const AlertsContent: FC = ({ groups = [], statsCount }) => { name: group.name, interval: group.interval, rules: ruleFilterList.map((value) => value.original), + labels: group.labels, }); } } @@ -114,6 +116,7 @@ const AlertsContent: FC = ({ groups = [], statsCount }) => { name: group.name, interval: group.interval, rules: group.rules.filter((value) => filter[value.state]), + labels: group.labels, }; if (newGroup.rules.length > 0) { result.push(newGroup); diff --git a/web/ui/react-app/src/pages/rules/RulesContent.tsx b/web/ui/react-app/src/pages/rules/RulesContent.tsx index 9bb866d47..cd42a337f 100644 --- a/web/ui/react-app/src/pages/rules/RulesContent.tsx +++ b/web/ui/react-app/src/pages/rules/RulesContent.tsx @@ -17,6 +17,7 @@ interface RuleGroup { rules: Rule[]; evaluationTime: string; lastEvaluation: string; + labels: Record; } export interface RulesMap { @@ -105,10 +106,10 @@ export const RulesContent: FC = ({ response }) => { keep_firing_for: {formatDuration(r.keepFiringFor * 1000)} )} - {r.labels && Object.keys(r.labels).length > 0 && ( + {Object.keys(Object.assign({ ...g.labels }, { ...r.labels })).length > 0 && (
labels: - {Object.entries(r.labels).map(([key, value]) => ( + {Object.entries(Object.assign({ ...g.labels }, { ...r.labels })).map(([key, value]) => (
{key}: {value}
From 33adbe47b14736bec9811eda9709fc4d6365d622 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Fri, 23 Aug 2024 09:30:22 +0100 Subject: [PATCH 007/339] [PERF] TSDB: Grow postings by doubling Go's built-in append() grows larger slices with factor 1.3, which means we do a lot more allocating and copying for larger postings. Signed-off-by: Bryan Boreham --- tsdb/index/postings.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index bfe74c323..5ed41f769 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -345,13 +345,22 @@ func (p *MemPostings) Add(id storage.SeriesRef, lset labels.Labels) { p.mtx.Unlock() } +func appendWithExponentialGrowth[T any](a []T, v T) []T { + if cap(a) < len(a)+1 { + newList := make([]T, len(a), len(a)*2+1) + copy(newList, a) + a = newList + } + return append(a, v) +} + func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { nm, ok := p.m[l.Name] if !ok { nm = map[string][]storage.SeriesRef{} p.m[l.Name] = nm } - list := append(nm[l.Value], id) + list := appendWithExponentialGrowth(nm[l.Value], id) nm[l.Value] = list if !p.ordered { From e67358d203864018ecbbe8c74c1cb3af3be4b2b4 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Sep 2024 15:39:05 +1000 Subject: [PATCH 008/339] histogram: include counter reset hint in test expression output Signed-off-by: Charles Korn --- model/histogram/float_histogram.go | 11 +++++++++++ promql/parser/parse_test.go | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 2a37ea66d..1777afdbf 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -230,6 +230,17 @@ func (h *FloatHistogram) TestExpression() string { res = append(res, fmt.Sprintf("custom_values:%g", m.CustomValues)) } + switch m.CounterResetHint { + case UnknownCounterReset: + // Unknown is the default, don't add anything. + case CounterReset: + res = append(res, fmt.Sprintf("counter_reset_hint:reset")) + case NotCounterReset: + res = append(res, fmt.Sprintf("counter_reset_hint:not_reset")) + case GaugeType: + res = append(res, fmt.Sprintf("counter_reset_hint:gauge")) + } + addBuckets := func(kind, bucketsKey, offsetKey string, buckets []float64, spans []Span) []string { if len(spans) > 1 { panic(fmt.Sprintf("histogram with multiple %s spans not supported", kind)) diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index 37748323c..40e680918 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -4385,6 +4385,22 @@ func TestHistogramTestExpression(t *testing.T) { }, expected: `{{offset:-3 buckets:[5.1 0 0 0 0 10 7] n_offset:-1 n_buckets:[4.1 5 0 0 7 8 9]}}`, }, + { + name: "known counter reset hint", + input: histogram.FloatHistogram{ + Schema: 1, + Sum: -0.3, + Count: 3.1, + ZeroCount: 7.1, + ZeroThreshold: 0.05, + PositiveBuckets: []float64{5.1, 10, 7}, + PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + NegativeBuckets: []float64{4.1, 5}, + NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + CounterResetHint: histogram.CounterReset, + }, + expected: `{{schema:1 count:3.1 sum:-0.3 z_bucket:7.1 z_bucket_w:0.05 counter_reset_hint:reset offset:-3 buckets:[5.1 10 7] n_offset:-5 n_buckets:[4.1 5]}}`, + }, } { t.Run(test.name, func(t *testing.T) { expression := test.input.TestExpression() From 90dc1b45dbb448f6ce0ff9349dcd06e76db4f525 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Sep 2024 15:47:07 +1000 Subject: [PATCH 009/339] promqltest: use test expression format for histograms in assertion failure messages Signed-off-by: Charles Korn --- promql/promqltest/test.go | 10 ++++++++-- promql/promqltest/test_test.go | 6 +++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index 065e52e33..bab838862 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -779,7 +779,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } if !compareNativeHistogram(expected.H.Compact(0), actual.H.Compact(0)) { - return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H, actual.H, formatSeriesResult(s)) + return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H.TestExpression(), actual.H.TestExpression(), formatSeriesResult(s)) } } } @@ -995,7 +995,13 @@ func formatSeriesResult(s promql.Series) string { histogramPlural = "" } - return fmt.Sprintf("%v float point%s %v and %v histogram point%s %v", len(s.Floats), floatPlural, s.Floats, len(s.Histograms), histogramPlural, s.Histograms) + histograms := make([]string, 0, len(s.Histograms)) + + for _, p := range s.Histograms { + histograms = append(histograms, fmt.Sprintf("%v @[%v]", p.H.TestExpression(), p.T)) + } + + return fmt.Sprintf("%v float point%s %v and %v histogram point%s %v", len(s.Floats), floatPlural, s.Floats, len(s.Histograms), histogramPlural, histograms) } // HistogramTestExpression returns TestExpression() for the given histogram or "" if the histogram is nil. diff --git a/promql/promqltest/test_test.go b/promql/promqltest/test_test.go index 49b43eb12..bd965b00b 100644 --- a/promql/promqltest/test_test.go +++ b/promql/promqltest/test_test.go @@ -381,7 +381,7 @@ load 5m eval range from 0 to 10m step 5m testmetric testmetric {{schema:-1 sum:4 count:1 buckets:[1] offset:1}} {{schema:-1 sum:7 count:1 buckets:[1] offset:1}} {{schema:-1 sum:8 count:1 buckets:[1] offset:1}} `, - expectedError: `error in eval testmetric (line 5): expected histogram value at index 1 (t=300000) for {__name__="testmetric"} to be {count:1, sum:7, (1,4]:1}, but got {count:1, sum:5, (1,4]:1} (result has 0 float points [] and 3 histogram points [{count:1, sum:4, (1,4]:1} @[0] {count:1, sum:5, (1,4]:1} @[300000] {count:1, sum:6, (1,4]:1} @[600000]])`, + expectedError: `error in eval testmetric (line 5): expected histogram value at index 1 (t=300000) for {__name__="testmetric"} to be {{schema:-1 count:1 sum:7 offset:1 buckets:[1]}}, but got {{schema:-1 count:1 sum:5 counter_reset_hint:not_reset offset:1 buckets:[1]}} (result has 0 float points [] and 3 histogram points [{{schema:-1 count:1 sum:4 offset:1 buckets:[1]}} @[0] {{schema:-1 count:1 sum:5 counter_reset_hint:not_reset offset:1 buckets:[1]}} @[300000] {{schema:-1 count:1 sum:6 counter_reset_hint:not_reset offset:1 buckets:[1]}} @[600000]])`, }, "range query with too many points for query time range": { input: testData + ` @@ -532,7 +532,7 @@ load 5m eval range from 0 to 5m step 5m testmetric testmetric 2 3 `, - expectedError: `error in eval testmetric (line 5): expected 2 float points and 0 histogram points for {__name__="testmetric"}, but got 0 float points [] and 2 histogram points [{count:0, sum:0} @[0] {count:0, sum:0} @[300000]]`, + expectedError: `error in eval testmetric (line 5): expected 2 float points and 0 histogram points for {__name__="testmetric"}, but got 0 float points [] and 2 histogram points [{{}} @[0] {{counter_reset_hint:not_reset}} @[300000]]`, }, "range query with expected mixed results": { input: ` @@ -552,7 +552,7 @@ load 5m eval range from 0 to 5m step 5m testmetric testmetric {{}} 3 `, - expectedError: `error in eval testmetric (line 5): expected float value at index 0 for {__name__="testmetric"} to have timestamp 300000, but it had timestamp 0 (result has 1 float point [3 @[0]] and 1 histogram point [{count:0, sum:0} @[300000]])`, + expectedError: `error in eval testmetric (line 5): expected float value at index 0 for {__name__="testmetric"} to have timestamp 300000, but it had timestamp 0 (result has 1 float point [3 @[0]] and 1 histogram point [{{}} @[300000]])`, }, "instant query with expected scalar result": { input: ` From 9b451abec735cbab73627f2c56689e8a8f4faa41 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Sep 2024 16:08:05 +1000 Subject: [PATCH 010/339] Make positive and negative bucket counts different in existing test cases Signed-off-by: Charles Korn --- promql/promqltest/testdata/native_histograms.test | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 71e102dce..4f60d2cbd 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -720,27 +720,27 @@ eval instant at 10m histogram_sum(scalar(histogram_fraction(-Inf, +Inf, sum(hist # Apply multiplication and division operator to histogram. load 10m - histogram_mul_div {{schema:0 count:21 sum:33 z_bucket:3 z_bucket_w:0.001 buckets:[3 3 3] n_buckets:[3 3 3]}}x1 + histogram_mul_div {{schema:0 count:30 sum:33 z_bucket:3 z_bucket_w:0.001 buckets:[3 3 3] n_buckets:[6 6 6]}}x1 float_series_3 3+0x1 float_series_0 0+0x1 eval instant at 10m histogram_mul_div*3 - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m 3*histogram_mul_div - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m histogram_mul_div*float_series_3 - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m float_series_3*histogram_mul_div - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m histogram_mul_div/3 - {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}} + {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} eval instant at 10m histogram_mul_div/float_series_3 - {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}} + {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} eval instant at 10m histogram_mul_div*0 {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} From 113de6301c41c64819e8eeddc8c1f862d720b7af Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Sep 2024 16:20:28 +1000 Subject: [PATCH 011/339] Add failing test cases for unary negation and multiplication and division with negative scalars Signed-off-by: Charles Korn --- promql/promqltest/testdata/native_histograms.test | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 4f60d2cbd..ee2ae7759 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -727,6 +727,15 @@ load 10m eval instant at 10m histogram_mul_div*3 {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} +eval instant at 10m histogram_mul_div*-1 + {} {{schema:0 count:30 sum:-33 z_bucket:3 z_bucket_w:0.001 buckets:[6 6 6] n_buckets:[3 3 3]}} + +eval instant at 10m -histogram_mul_div + {} {{schema:0 count:30 sum:-33 z_bucket:3 z_bucket_w:0.001 buckets:[6 6 6] n_buckets:[3 3 3]}} + +eval instant at 10m histogram_mul_div*-3 + {} {{schema:0 count:90 sum:-99 z_bucket:9 z_bucket_w:0.001 buckets:[18 18 18] n_buckets:[9 9 9]}} + eval instant at 10m 3*histogram_mul_div {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} @@ -739,6 +748,9 @@ eval instant at 10m float_series_3*histogram_mul_div eval instant at 10m histogram_mul_div/3 {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} +eval instant at 10m histogram_mul_div/-3 + {} {{schema:0 count:10 sum:-11 z_bucket:1 z_bucket_w:0.001 buckets:[2 2 2] n_buckets:[1 1 1]}} + eval instant at 10m histogram_mul_div/float_series_3 {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} From 4da551578c691591d54bddf38fd8b1620c5faa73 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Sep 2024 16:33:18 +1000 Subject: [PATCH 012/339] Fix test broken by inclusion of `counter_reset_hint` Signed-off-by: Charles Korn --- cmd/promtool/testdata/unittest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/promtool/testdata/unittest.yml b/cmd/promtool/testdata/unittest.yml index ff511729b..d6224d785 100644 --- a/cmd/promtool/testdata/unittest.yml +++ b/cmd/promtool/testdata/unittest.yml @@ -69,13 +69,13 @@ tests: eval_time: 2m exp_samples: - labels: "test_histogram_repeat" - histogram: "{{count:2 sum:3 buckets:[2]}}" + histogram: "{{count:2 sum:3 counter_reset_hint:not_reset buckets:[2]}}" - expr: test_histogram_increase eval_time: 2m exp_samples: - labels: "test_histogram_increase" - histogram: "{{count:4 sum:5.6 buckets:[4]}}" + histogram: "{{count:4 sum:5.6 counter_reset_hint:not_reset buckets:[4]}}" # Ensure a value is stale as soon as it is marked as such. - expr: test_stale From df750ef17018dd3f197f9726920ba67328055335 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:58:34 +0000 Subject: [PATCH 013/339] build(deps): bump github.com/go-zookeeper/zk from 1.0.3 to 1.0.4 Bumps [github.com/go-zookeeper/zk](https://github.com/go-zookeeper/zk) from 1.0.3 to 1.0.4. - [Release notes](https://github.com/go-zookeeper/zk/releases) - [Commits](https://github.com/go-zookeeper/zk/compare/v1.0.3...v1.0.4) --- updated-dependencies: - dependency-name: github.com/go-zookeeper/zk dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 845e3277b..667a6fa56 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/go-kit/log v0.2.1 github.com/go-logfmt/logfmt v0.6.0 github.com/go-openapi/strfmt v0.23.0 - github.com/go-zookeeper/zk v1.0.3 + github.com/go-zookeeper/zk v1.0.4 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 github.com/google/go-cmp v0.6.0 diff --git a/go.sum b/go.sum index edb5b650b..e03a64918 100644 --- a/go.sum +++ b/go.sum @@ -238,8 +238,8 @@ github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/go-zookeeper/zk v1.0.3 h1:7M2kwOsc//9VeeFiPtf+uSJlVpU66x9Ba5+8XK7/TDg= -github.com/go-zookeeper/zk v1.0.3/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= +github.com/go-zookeeper/zk v1.0.4 h1:DPzxraQx7OrPyXq2phlGlNSIyWEsAox0RJmjTseMV6I= +github.com/go-zookeeper/zk v1.0.4/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= From 98528550842e38be419c2d8fbbc437a9da2b39bf Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Sep 2024 16:20:47 +1000 Subject: [PATCH 014/339] Implement unary negation for native histograms Signed-off-by: Charles Korn --- promql/engine.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/promql/engine.go b/promql/engine.go index b54ce2d6d..1fa51ef48 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -1799,6 +1799,9 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio for j := range mat[i].Floats { mat[i].Floats[j].F = -mat[i].Floats[j].F } + for j := range mat[i].Histograms { + mat[i].Histograms[j].H = mat[i].Histograms[j].H.Copy().Mul(-1) + } } if !ev.enableDelayedNameRemoval && mat.ContainsSameLabelset() { ev.errorf("vector cannot contain metrics with the same labelset") From 2bdb3452d1181e40283c2ae141adf4f0e952d48a Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Mon, 9 Sep 2024 14:29:29 +1000 Subject: [PATCH 015/339] Modify parser for native histograms to allow negative values where supported Signed-off-by: Charles Korn --- promql/parser/generated_parser.y | 8 +- promql/parser/generated_parser.y.go | 156 ++++++++++++++-------------- promql/parser/lex.go | 3 + promql/parser/parse_test.go | 24 ++++- 4 files changed, 105 insertions(+), 86 deletions(-) diff --git a/promql/parser/generated_parser.y b/promql/parser/generated_parser.y index da24be0c4..befb9bdf3 100644 --- a/promql/parser/generated_parser.y +++ b/promql/parser/generated_parser.y @@ -818,12 +818,12 @@ histogram_desc_item $$ = yylex.(*parser).newMap() $$["sum"] = $3 } - | COUNT_DESC COLON number + | COUNT_DESC COLON signed_or_unsigned_number { $$ = yylex.(*parser).newMap() $$["count"] = $3 } - | ZERO_BUCKET_DESC COLON number + | ZERO_BUCKET_DESC COLON signed_or_unsigned_number { $$ = yylex.(*parser).newMap() $$["z_bucket"] = $3 @@ -875,11 +875,11 @@ bucket_set : LEFT_BRACKET bucket_set_list SPACE RIGHT_BRACKET } ; -bucket_set_list : bucket_set_list SPACE number +bucket_set_list : bucket_set_list SPACE signed_or_unsigned_number { $$ = append($1, $3) } - | number + | signed_or_unsigned_number { $$ = []float64{$1} } diff --git a/promql/parser/generated_parser.y.go b/promql/parser/generated_parser.y.go index 22231f73e..ad58a5297 100644 --- a/promql/parser/generated_parser.y.go +++ b/promql/parser/generated_parser.y.go @@ -410,55 +410,55 @@ const yyPrivate = 57344 const yyLast = 799 var yyAct = [...]int16{ - 155, 334, 332, 276, 339, 152, 226, 39, 192, 44, - 291, 290, 156, 118, 82, 178, 229, 107, 106, 346, - 347, 348, 349, 109, 108, 198, 239, 199, 133, 110, - 105, 60, 245, 121, 6, 329, 325, 111, 328, 228, - 200, 201, 160, 119, 304, 267, 293, 128, 260, 160, - 151, 261, 159, 302, 358, 311, 122, 55, 89, 159, - 196, 241, 242, 259, 113, 243, 114, 54, 98, 99, - 302, 112, 101, 256, 104, 88, 230, 232, 234, 235, + 152, 334, 332, 155, 339, 226, 39, 192, 276, 44, + 291, 290, 118, 82, 178, 229, 107, 106, 346, 347, + 348, 349, 109, 108, 198, 239, 199, 156, 110, 105, + 6, 245, 200, 201, 133, 325, 111, 329, 228, 60, + 357, 293, 328, 304, 267, 160, 266, 128, 55, 151, + 302, 311, 302, 196, 340, 159, 55, 89, 54, 356, + 241, 242, 355, 113, 243, 114, 54, 98, 99, 265, + 112, 101, 256, 104, 88, 230, 232, 234, 235, 236, + 244, 246, 249, 250, 251, 252, 253, 257, 258, 105, + 333, 231, 233, 237, 238, 240, 247, 248, 103, 115, + 109, 254, 255, 324, 150, 218, 110, 264, 111, 270, + 77, 35, 7, 149, 188, 163, 322, 321, 173, 320, + 167, 170, 323, 165, 271, 166, 2, 3, 4, 5, + 263, 101, 194, 104, 180, 184, 197, 187, 186, 319, + 272, 202, 203, 204, 205, 206, 207, 208, 209, 210, + 211, 212, 213, 214, 215, 216, 195, 299, 103, 318, + 217, 36, 298, 1, 190, 219, 220, 317, 160, 160, + 316, 193, 160, 154, 182, 196, 229, 297, 159, 159, + 160, 358, 159, 268, 181, 183, 239, 260, 296, 262, + 159, 315, 245, 129, 314, 55, 225, 313, 161, 228, + 161, 161, 259, 312, 161, 54, 86, 295, 310, 288, + 289, 8, 161, 292, 162, 37, 162, 162, 49, 269, + 162, 241, 242, 309, 179, 243, 180, 127, 162, 126, + 308, 223, 294, 256, 48, 222, 230, 232, 234, 235, 236, 244, 246, 249, 250, 251, 252, 253, 257, 258, - 160, 115, 231, 233, 237, 238, 240, 247, 248, 103, - 159, 109, 254, 255, 324, 150, 357, 110, 333, 218, - 111, 340, 310, 149, 77, 163, 7, 105, 35, 173, - 167, 170, 161, 323, 165, 356, 166, 309, 355, 194, - 2, 3, 4, 5, 308, 322, 184, 197, 162, 186, - 321, 195, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 229, 129, 101, - 217, 104, 219, 220, 190, 266, 270, 239, 160, 121, - 268, 193, 264, 245, 55, 196, 154, 225, 159, 119, - 228, 271, 188, 160, 54, 161, 103, 117, 265, 84, - 262, 299, 122, 159, 320, 263, 298, 272, 10, 83, - 161, 162, 241, 242, 269, 187, 243, 185, 79, 288, - 289, 297, 319, 292, 256, 161, 162, 230, 232, 234, - 235, 236, 244, 246, 249, 250, 251, 252, 253, 257, - 258, 162, 294, 231, 233, 237, 238, 240, 247, 248, - 318, 317, 316, 254, 255, 180, 315, 134, 135, 136, - 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, - 147, 148, 157, 158, 169, 105, 314, 296, 300, 301, - 303, 223, 305, 313, 55, 222, 179, 168, 180, 84, - 306, 307, 177, 125, 54, 182, 295, 176, 124, 83, - 221, 312, 87, 89, 8, 181, 183, 81, 37, 86, - 175, 123, 36, 98, 99, 326, 327, 101, 102, 104, - 88, 127, 331, 126, 50, 336, 337, 338, 182, 335, - 78, 1, 342, 341, 344, 343, 49, 48, 181, 183, - 350, 351, 47, 55, 103, 352, 53, 77, 164, 56, - 46, 354, 22, 54, 59, 55, 172, 9, 9, 57, - 132, 45, 43, 130, 171, 54, 359, 42, 131, 41, - 40, 51, 191, 353, 273, 75, 85, 189, 224, 80, - 345, 18, 19, 120, 153, 20, 58, 227, 52, 116, + 221, 169, 231, 233, 237, 238, 240, 247, 248, 157, + 158, 164, 254, 255, 168, 10, 182, 300, 55, 301, + 303, 47, 305, 46, 132, 79, 181, 183, 54, 306, + 307, 45, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 43, 59, 50, + 84, 9, 9, 121, 326, 78, 327, 130, 171, 121, + 83, 42, 131, 119, 335, 336, 337, 331, 185, 119, + 338, 261, 342, 341, 344, 343, 122, 117, 41, 177, + 350, 351, 122, 55, 176, 352, 53, 77, 40, 56, + 125, 354, 22, 54, 84, 124, 172, 175, 51, 57, + 191, 353, 273, 85, 83, 189, 359, 224, 123, 80, + 345, 120, 81, 153, 58, 75, 227, 52, 116, 0, + 0, 18, 19, 0, 0, 20, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 0, 0, 0, 13, 0, 0, 0, 24, 0, 30, - 0, 0, 31, 32, 55, 38, 0, 53, 77, 0, + 0, 0, 31, 32, 55, 38, 105, 53, 77, 0, 56, 275, 0, 22, 54, 0, 0, 0, 274, 0, 57, 0, 278, 279, 277, 284, 286, 283, 285, 280, - 281, 282, 287, 0, 0, 0, 75, 0, 0, 0, - 0, 0, 18, 19, 0, 0, 20, 0, 0, 0, - 0, 0, 76, 0, 0, 0, 0, 61, 62, 63, + 281, 282, 287, 87, 89, 0, 75, 0, 0, 0, + 0, 0, 18, 19, 98, 99, 20, 0, 101, 102, + 104, 88, 76, 0, 0, 0, 0, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, - 74, 0, 0, 0, 13, 0, 0, 0, 24, 0, + 74, 0, 0, 0, 13, 103, 0, 0, 24, 0, 30, 0, 55, 31, 32, 53, 77, 0, 56, 330, 0, 22, 54, 0, 0, 0, 0, 0, 57, 0, 278, 279, 277, 284, 286, 283, 285, 280, 281, 282, @@ -493,51 +493,51 @@ var yyAct = [...]int16{ } var yyPact = [...]int16{ - 32, 106, 569, 569, 405, 526, -1000, -1000, -1000, 105, + 28, 102, 569, 569, 405, 526, -1000, -1000, -1000, 98, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, -1000, -1000, 277, -1000, 297, -1000, 650, + -1000, -1000, -1000, -1000, -1000, 342, -1000, 204, -1000, 650, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, 22, 95, -1000, -1000, 483, -1000, 483, 101, + -1000, -1000, 21, 93, -1000, -1000, 483, -1000, 483, 97, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, -1000, -1000, -1000, -1000, 167, -1000, -1000, - 281, -1000, -1000, 309, -1000, 23, -1000, -50, -50, -50, - -50, -50, -50, -50, -50, -50, -50, -50, -50, -50, - -50, -50, -50, 48, 174, 336, 95, -56, -1000, 262, - 262, 324, -1000, 631, 103, -1000, 280, -1000, -1000, 274, - 241, -1000, -1000, -1000, 187, -1000, 180, -1000, 159, 483, - -1000, -57, -40, -1000, 483, 483, 483, 483, 483, 483, + -1000, -1000, -1000, -1000, -1000, -1000, -1000, 307, -1000, -1000, + 338, -1000, -1000, 225, -1000, 23, -1000, -44, -44, -44, + -44, -44, -44, -44, -44, -44, -44, -44, -44, -44, + -44, -44, -44, 47, 171, 259, 93, -57, -1000, 249, + 249, 324, -1000, 631, 75, -1000, 327, -1000, -1000, 222, + 130, -1000, -1000, -1000, 298, -1000, 112, -1000, 159, 483, + -1000, -58, -48, -1000, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, 483, -1000, - 165, -1000, -1000, 94, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, 40, 40, 269, -1000, -1000, -1000, -1000, 155, -1000, - -1000, 41, -1000, 650, -1000, -1000, 31, -1000, 170, -1000, - -1000, -1000, -1000, -1000, 163, -1000, -1000, -1000, -1000, -1000, - 19, 144, 140, -1000, -1000, -1000, 404, 16, 262, 262, - 262, 262, 103, 103, 251, 251, 251, 715, 696, 251, - 251, 715, 103, 103, 251, 103, 16, -1000, 24, -1000, - -1000, -1000, 265, -1000, 189, -1000, -1000, -1000, -1000, -1000, + 39, -1000, -1000, 90, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, 36, 36, 229, -1000, -1000, -1000, -1000, 174, -1000, + -1000, 180, -1000, 650, -1000, -1000, 301, -1000, 105, -1000, + -1000, -1000, -1000, -1000, 44, -1000, -1000, -1000, -1000, -1000, + 18, 157, 83, -1000, -1000, -1000, 404, 15, 249, 249, + 249, 249, 75, 75, 402, 402, 402, 715, 696, 402, + 402, 715, 75, 75, 402, 75, 15, -1000, 19, -1000, + -1000, -1000, 186, -1000, 155, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - 483, -1000, -1000, -1000, -1000, -1000, -1000, 34, 34, 18, - 34, 44, 44, 110, 38, -1000, -1000, 285, 267, 260, - 240, 236, 235, 234, 206, 188, 134, 129, -1000, -1000, - -1000, -1000, -1000, -1000, 102, -1000, -1000, -1000, 14, -1000, - 650, -1000, -1000, -1000, 34, -1000, 12, 9, 482, -1000, - -1000, -1000, 51, 81, 40, 40, 40, 97, 97, 51, - 97, 51, -73, -1000, -1000, -1000, -1000, -1000, 34, 34, - -1000, -1000, -1000, 34, -1000, -1000, -1000, -1000, -1000, -1000, - 40, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, 104, -1000, 33, -1000, -1000, -1000, -1000, + 483, -1000, -1000, -1000, -1000, -1000, -1000, 31, 31, 17, + 31, 37, 37, 206, 34, -1000, -1000, 197, 191, 188, + 185, 164, 161, 153, 133, 113, 111, 110, -1000, -1000, + -1000, -1000, -1000, -1000, 101, -1000, -1000, -1000, 13, -1000, + 650, -1000, -1000, -1000, 31, -1000, 16, 11, 482, -1000, + -1000, -1000, 33, 163, 163, 163, 36, 40, 40, 33, + 40, 33, -74, -1000, -1000, -1000, -1000, -1000, 31, 31, + -1000, -1000, -1000, 31, -1000, -1000, -1000, -1000, -1000, -1000, + 163, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, 38, -1000, 160, -1000, -1000, -1000, -1000, } var yyPgo = [...]int16{ - 0, 379, 13, 378, 6, 15, 377, 344, 376, 374, - 373, 370, 198, 294, 369, 14, 368, 10, 11, 367, - 366, 8, 364, 3, 4, 363, 2, 1, 0, 362, - 12, 5, 361, 360, 18, 158, 359, 358, 7, 357, - 354, 17, 353, 31, 352, 9, 351, 350, 340, 332, - 327, 326, 314, 321, 302, + 0, 368, 12, 367, 5, 14, 366, 298, 364, 363, + 361, 360, 265, 211, 359, 13, 357, 10, 11, 355, + 353, 7, 352, 8, 4, 351, 2, 1, 3, 350, + 27, 0, 348, 338, 17, 193, 328, 312, 6, 311, + 308, 16, 307, 39, 297, 9, 281, 274, 273, 271, + 234, 218, 299, 163, 161, } var yyR1 = [...]int8{ @@ -630,9 +630,9 @@ var yyChk = [...]int16{ -38, -27, 19, -27, 26, -27, -21, -21, 24, 17, 2, 17, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 2, 22, -4, -27, 26, 26, - 17, -23, -26, 57, -27, -31, -28, -28, -28, -24, + 17, -23, -26, 57, -27, -31, -31, -31, -28, -24, 14, -24, -26, -24, -26, -11, 92, 93, 94, 95, - -27, -27, -27, -25, -28, 24, 21, 2, 21, -28, + -27, -27, -27, -25, -31, 24, 21, 2, 21, -31, } var yyDef = [...]int16{ diff --git a/promql/parser/lex.go b/promql/parser/lex.go index d031e8330..82bf0367b 100644 --- a/promql/parser/lex.go +++ b/promql/parser/lex.go @@ -610,6 +610,9 @@ func lexBuckets(l *Lexer) stateFn { case isSpace(r): l.emit(SPACE) return lexSpace + case r == '-': + l.emit(SUB) + return lexNumber case isDigit(r): l.backup() return lexNumber diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index 37748323c..d9956e745 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -4084,17 +4084,17 @@ func TestParseHistogramSeries(t *testing.T) { }, { name: "all properties used", - input: `{} {{schema:1 sum:-0.3 count:3.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5 counter_reset_hint:gauge}}`, + input: `{} {{schema:1 sum:0.3 count:3.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:3 n_buckets:[4.1 5] n_offset:5 counter_reset_hint:gauge}}`, expected: []histogram.FloatHistogram{{ Schema: 1, - Sum: -0.3, + Sum: 0.3, Count: 3.1, ZeroCount: 7.1, ZeroThreshold: 0.05, PositiveBuckets: []float64{5.1, 10, 7}, - PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + PositiveSpans: []histogram.Span{{Offset: 3, Length: 3}}, NegativeBuckets: []float64{4.1, 5}, - NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + NegativeSpans: []histogram.Span{{Offset: 5, Length: 2}}, CounterResetHint: histogram.GaugeType, }}, }, @@ -4114,6 +4114,22 @@ func TestParseHistogramSeries(t *testing.T) { CounterResetHint: histogram.GaugeType, }}, }, + { + name: "all properties used, with negative values where supported", + input: `{} {{schema:1 sum:-0.3 count:-3.1 z_bucket:-7.1 z_bucket_w:0.05 buckets:[-5.1 -10 -7] offset:-3 n_buckets:[-4.1 -5] n_offset:-5 counter_reset_hint:gauge}}`, + expected: []histogram.FloatHistogram{{ + Schema: 1, + Sum: -0.3, + Count: -3.1, + ZeroCount: -7.1, + ZeroThreshold: 0.05, + PositiveBuckets: []float64{-5.1, -10, -7}, + PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + NegativeBuckets: []float64{-4.1, -5}, + NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + CounterResetHint: histogram.GaugeType, + }}, + }, { name: "static series", input: `{} {{buckets:[5 10 7] schema:1}}x2`, From e8c74821375131e33dfa9c2d14ddc896fc776b5d Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Wed, 4 Sep 2024 16:21:17 +1000 Subject: [PATCH 016/339] Return negative counts when multiplied or divided by a negative value Signed-off-by: Charles Korn --- model/histogram/float_histogram_test.go | 96 +++++++++++++++++++ .../testdata/native_histograms.test | 8 +- 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index 1558a6d67..cf370a313 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -131,6 +131,54 @@ func TestFloatHistogramMul(t *testing.T) { NegativeBuckets: []float64{9, 3, 15, 18}, }, }, + { + "negation", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -1, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -11, + Count: -30, + Sum: -23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-1, 0, -3, -4, -7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-3, -1, -5, -6}, + }, + }, + { + "negative multiplier", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -2, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -22, + Count: -60, + Sum: -46, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-2, 0, -6, -8, -14}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-6, -2, -10, -12}, + }, + }, { "no-op with custom buckets", &FloatHistogram{ @@ -409,6 +457,54 @@ func TestFloatHistogramDiv(t *testing.T) { NegativeBuckets: []float64{1.5, 0.5, 2.5, 3}, }, }, + { + "negation", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 5.5, + Count: 3493.3, + Sum: 2349209.324, + PositiveSpans: []Span{{-2, 1}, {2, 3}}, + PositiveBuckets: []float64{1, 3.3, 4.2, 0.1}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3.1, 3, 1.234e5, 1000}, + }, + -1, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -5.5, + Count: -3493.3, + Sum: -2349209.324, + PositiveSpans: []Span{{-2, 1}, {2, 3}}, + PositiveBuckets: []float64{-1, -3.3, -4.2, -0.1}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-3.1, -3, -1.234e5, -1000}, + }, + }, + { + "negative half", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -2, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -5.5, + Count: -15, + Sum: -11.5, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-0.5, 0, -1.5, -2, -3.5}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-1.5, -0.5, -2.5, -3}, + }, + }, { "no-op with custom buckets", &FloatHistogram{ diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index ee2ae7759..7d2eec32c 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -728,13 +728,13 @@ eval instant at 10m histogram_mul_div*3 {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m histogram_mul_div*-1 - {} {{schema:0 count:30 sum:-33 z_bucket:3 z_bucket_w:0.001 buckets:[6 6 6] n_buckets:[3 3 3]}} + {} {{schema:0 count:-30 sum:-33 z_bucket:-3 z_bucket_w:0.001 buckets:[-3 -3 -3] n_buckets:[-6 -6 -6]}} eval instant at 10m -histogram_mul_div - {} {{schema:0 count:30 sum:-33 z_bucket:3 z_bucket_w:0.001 buckets:[6 6 6] n_buckets:[3 3 3]}} + {} {{schema:0 count:-30 sum:-33 z_bucket:-3 z_bucket_w:0.001 buckets:[-3 -3 -3] n_buckets:[-6 -6 -6]}} eval instant at 10m histogram_mul_div*-3 - {} {{schema:0 count:90 sum:-99 z_bucket:9 z_bucket_w:0.001 buckets:[18 18 18] n_buckets:[9 9 9]}} + {} {{schema:0 count:-90 sum:-99 z_bucket:-9 z_bucket_w:0.001 buckets:[-9 -9 -9] n_buckets:[-18 -18 -18]}} eval instant at 10m 3*histogram_mul_div {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} @@ -749,7 +749,7 @@ eval instant at 10m histogram_mul_div/3 {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} eval instant at 10m histogram_mul_div/-3 - {} {{schema:0 count:10 sum:-11 z_bucket:1 z_bucket_w:0.001 buckets:[2 2 2] n_buckets:[1 1 1]}} + {} {{schema:0 count:-10 sum:-11 z_bucket:-1 z_bucket_w:0.001 buckets:[-1 -1 -1] n_buckets:[-2 -2 -2]}} eval instant at 10m histogram_mul_div/float_series_3 {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} From 0a88943594b7c95d1a6ae4a9b06011690730b866 Mon Sep 17 00:00:00 2001 From: Julien Date: Fri, 6 Sep 2024 14:02:44 +0200 Subject: [PATCH 017/339] Scrape: test for q-value compliance with RFC 9110 in Accept header Signed-off-by: Julien --- scrape/scrape_test.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index a69a19d7f..637d5a79c 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -34,6 +34,7 @@ import ( "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" + "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" dto "github.com/prometheus/client_model/go" @@ -2379,8 +2380,11 @@ func TestTargetScraperScrapeOK(t *testing.T) { expectedTimeout = "1.5" ) - var protobufParsing bool - var allowUTF8 bool + var ( + protobufParsing bool + allowUTF8 bool + qValuePattern = regexp.MustCompile(`q=([0-9]+(\.\d+)?)`) + ) server := httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -2393,6 +2397,17 @@ func TestTargetScraperScrapeOK(t *testing.T) { "Expected Accept header to prefer application/vnd.google.protobuf.") } + contentTypes := strings.Split(accept, ",") + for _, ct := range contentTypes { + match := qValuePattern.FindStringSubmatch(ct) + require.Len(t, match, 3) + qValue, err := strconv.ParseFloat(match[1], 64) + require.NoError(t, err, "Error parsing q value") + require.GreaterOrEqual(t, qValue, float64(0)) + require.LessOrEqual(t, qValue, float64(1)) + require.LessOrEqual(t, len(strings.Split(match[1], ".")[1]), 3, "q value should have at most 3 decimal places") + } + timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds") require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.") From 11f344e4d3c1afbba9fc4237b14c6f84f8cf0d01 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Mon, 9 Sep 2024 14:32:27 +0200 Subject: [PATCH 018/339] Move AM discovery page from "Monitoring status" -> "Server status" Seems to at least make more sense to me like that. Signed-off-by: Julius Volz --- web/ui/mantine-ui/src/App.tsx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/web/ui/mantine-ui/src/App.tsx b/web/ui/mantine-ui/src/App.tsx index dd69cff38..9cba77702 100644 --- a/web/ui/mantine-ui/src/App.tsx +++ b/web/ui/mantine-ui/src/App.tsx @@ -109,13 +109,6 @@ const monitoringStatusPages = [ element: , inAgentMode: true, }, - { - title: "Alertmanager discovery", - path: "/discovered-alertmanagers", - icon: , - element: , - inAgentMode: false, - }, ]; const serverStatusPages = [ @@ -147,6 +140,13 @@ const serverStatusPages = [ element: , inAgentMode: true, }, + { + title: "Alertmanager discovery", + path: "/discovered-alertmanagers", + icon: , + element: , + inAgentMode: false, + }, ]; const allStatusPages = [...monitoringStatusPages, ...serverStatusPages]; From 35ef7d41ce6a3dbb996a2afe922421ae0683800b Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Mon, 9 Sep 2024 11:22:20 -0400 Subject: [PATCH 019/339] remove rfratto as a tsdb/agent maintainer Signed-off-by: Robert Fratto --- MAINTAINERS.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 3661ddaa0..7f4153abc 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -13,13 +13,12 @@ Maintainers for specific parts of the codebase: * `k8s`: Frederic Branczyk ( / @brancz) * `documentation` * `prometheus-mixin`: Matthias Loibl ( / @metalmatze) -* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), +* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), George Krajcsovits ( / @krajorama) * `storage` * `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( / @npazosmendez), Alex Greenbank ( / @alexgreenbank) * `otlptranslator`: Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) * `tsdb`: Ganesh Vernekar ( / @codesome), Bartłomiej Płotka ( / @bwplotka), Jesús Vázquez ( / @jesusvazquez) - * `agent`: Robert Fratto ( / @rfratto) * `web` * `ui`: Julius Volz ( / @juliusv) * `module`: Augustin Husson ( @nexucis) From c1080990ac14602c9389c50f2110c411d24af182 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Mon, 9 Sep 2024 18:55:32 +0200 Subject: [PATCH 020/339] Bring back documentation link in the form of an action button IMO this looks nicer than adding it as a normal page nav link as in https://github.com/prometheus/prometheus/pull/14878 Signed-off-by: Julius Volz --- web/ui/mantine-ui/src/App.tsx | 29 ++++++++++++------- .../src/components/SettingsMenu.tsx | 7 ++++- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/web/ui/mantine-ui/src/App.tsx b/web/ui/mantine-ui/src/App.tsx index 9cba77702..99ed2316d 100644 --- a/web/ui/mantine-ui/src/App.tsx +++ b/web/ui/mantine-ui/src/App.tsx @@ -6,6 +6,7 @@ import classes from "./App.module.css"; import PrometheusLogo from "./images/prometheus-logo.svg"; import { + ActionIcon, AppShell, Box, Burger, @@ -22,6 +23,7 @@ import { useDisclosure } from "@mantine/hooks"; import { IconBell, IconBellFilled, + IconBook, IconChevronDown, IconChevronRight, IconCloudDataConnection, @@ -306,17 +308,24 @@ function App() { ))} + + ); - {/* */} + + ); @@ -359,9 +368,8 @@ function App() { {navLinks} - - - + + {navActionIcons} {navLinks} - - + {navActionIcons} diff --git a/web/ui/mantine-ui/src/components/SettingsMenu.tsx b/web/ui/mantine-ui/src/components/SettingsMenu.tsx index 0d004bd4b..c0631b1b4 100644 --- a/web/ui/mantine-ui/src/components/SettingsMenu.tsx +++ b/web/ui/mantine-ui/src/components/SettingsMenu.tsx @@ -18,7 +18,12 @@ const SettingsMenu: FC = () => { return ( - + From 569b6abfa3f876419879989d9e17d9a4f3cfc01b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Pazos?= Date: Mon, 9 Sep 2024 11:46:31 -0300 Subject: [PATCH 021/339] fix(utf8): propagate validationScheme config to scraping options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolás Pazos --- scrape/scrape.go | 1 + scrape/scrape_test.go | 85 +++++++++++++++++++++++++++++++------------ 2 files changed, 63 insertions(+), 23 deletions(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index ea98432be..26969fe28 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -524,6 +524,7 @@ func (sp *scrapePool) sync(targets []*Target) { interval: interval, timeout: timeout, scrapeClassicHistograms: scrapeClassicHistograms, + validationScheme: validationScheme, }) if err != nil { l.setForcedError(err) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index a69a19d7f..608dd7bb7 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -3114,18 +3114,7 @@ func TestScrapeReportLimit(t *testing.T) { ScrapeTimeout: model.Duration(100 * time.Millisecond), } - var ( - scrapes int - scrapedTwice = make(chan bool) - ) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprint(w, "metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") - scrapes++ - if scrapes == 2 { - close(scrapedTwice) - } - })) + ts, scrapedTwice := newScrapableServer("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") defer ts.Close() sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) @@ -3168,6 +3157,52 @@ func TestScrapeReportLimit(t *testing.T) { require.True(t, found) } +func TestScrapeUTF8(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + model.NameValidationScheme = model.UTF8Validation + t.Cleanup(func() { model.NameValidationScheme = model.LegacyValidation }) + + cfg := &config.ScrapeConfig{ + JobName: "test", + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + MetricNameValidationScheme: config.UTF8ValidationConfig, + } + ts, scrapedTwice := newScrapableServer("{\"with.dots\"} 42\n") + defer ts.Close() + + sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped twice") + case <-scrapedTwice: + // If the target has been scraped twice, report samples from the first + // scrape have been inserted in the database. + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "with.dots")) + + require.True(t, series.Next(), "series not found in tsdb") +} + func TestScrapeLoopLabelLimit(t *testing.T) { tests := []struct { title string @@ -3364,16 +3399,7 @@ test_summary_count 199 // The expected "quantile" values do not have the trailing ".0". expectedQuantileValues := []string{"0.5", "0.9", "0.95", "0.99", "1"} - scrapeCount := 0 - scraped := make(chan bool) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprint(w, metricsText) - scrapeCount++ - if scrapeCount > 2 { - close(scraped) - } - })) + ts, scrapedTwice := newScrapableServer(metricsText) defer ts.Close() sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) @@ -3392,7 +3418,7 @@ test_summary_count 199 select { case <-time.After(5 * time.Second): t.Fatalf("target was not scraped") - case <-scraped: + case <-scrapedTwice: } ctx, cancel := context.WithCancel(context.Background()) @@ -3841,3 +3867,16 @@ scrape_configs: require.Equal(t, expectedSchema, h.Schema) } } + +func newScrapableServer(scrapeText string) (s *httptest.Server, scrapedTwice chan bool) { + var scrapes int + scrapedTwice = make(chan bool) + + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, scrapeText) + scrapes++ + if scrapes == 2 { + close(scrapedTwice) + } + })), scrapedTwice +} From 928f093eb04240ee6a9548b4a61e41083493b524 Mon Sep 17 00:00:00 2001 From: SuperQ Date: Tue, 10 Sep 2024 08:31:10 +0200 Subject: [PATCH 022/339] Update promci action Pick up new promci action to automatically configure `GOMEMLIMIT`. Signed-off-by: SuperQ --- .github/workflows/ci.yml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a181bb7c..1bb7d9a3e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,13 +12,9 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. image: quay.io/prometheus/golang-builder:1.23-base - env: - # Preliminary fix to make Go tests with race detector not use too much memory, - # see https://github.com/prometheus/prometheus/issues/14858. - GOMEMLIMIT: 10GiB steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: prometheus/promci@1a7aaf104b2dcbe64ffd9f98073ca3f33ca616eb # v0.4.2 - uses: ./.github/promci/actions/setup_environment - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 - run: go test --tags=stringlabels ./tsdb/ -test.tsdb-isolation=false @@ -32,7 +28,7 @@ jobs: image: quay.io/prometheus/golang-builder:1.23-base steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: prometheus/promci@1a7aaf104b2dcbe64ffd9f98073ca3f33ca616eb # v0.4.2 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... - run: GOARCH=386 go test ./cmd/prometheus @@ -65,7 +61,7 @@ jobs: steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: prometheus/promci@1a7aaf104b2dcbe64ffd9f98073ca3f33ca616eb # v0.4.2 - uses: ./.github/promci/actions/setup_environment with: enable_go: false @@ -122,7 +118,7 @@ jobs: thread: [ 0, 1, 2 ] steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: prometheus/promci@1a7aaf104b2dcbe64ffd9f98073ca3f33ca616eb # v0.4.2 - uses: ./.github/promci/actions/build with: promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" @@ -145,7 +141,7 @@ jobs: # should also be updated. steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: prometheus/promci@1a7aaf104b2dcbe64ffd9f98073ca3f33ca616eb # v0.4.2 - uses: ./.github/promci/actions/build with: parallelism: 12 @@ -207,7 +203,7 @@ jobs: if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: prometheus/promci@1a7aaf104b2dcbe64ffd9f98073ca3f33ca616eb # v0.4.2 - uses: ./.github/promci/actions/publish_main with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -221,7 +217,7 @@ jobs: if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.') steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: prometheus/promci@1a7aaf104b2dcbe64ffd9f98073ca3f33ca616eb # v0.4.2 - uses: ./.github/promci/actions/publish_release with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -236,7 +232,7 @@ jobs: steps: - name: Checkout uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: prometheus/promci@1a7aaf104b2dcbe64ffd9f98073ca3f33ca616eb # v0.4.2 - name: Install nodejs uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3 with: From 1b281f074e8736dc2b3f8c34e4c94370c62ec1d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:05:21 +0200 Subject: [PATCH 023/339] Bump @types/node from 22.5.2 to 22.5.4 in /web/ui (#14873) Bumps [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) from 22.5.2 to 22.5.4. - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node) --- updated-dependencies: - dependency-name: "@types/node" dependency-type: indirect update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- web/ui/package-lock.json | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index b3b85e6a6..92bcf3dd3 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -2971,11 +2971,10 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "22.5.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.2.tgz", - "integrity": "sha512-acJsPTEqYqulZS/Yp/S3GgeE6GZ0qYODUR8aVr/DkhHQ8l9nd4j5x1/ZJy9/gHrRlFMqkO6i0I3E27Alu4jjPg==", + "version": "22.5.4", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.4.tgz", + "integrity": "sha512-FDuKUJQm/ju9fT/SeX/6+gBzoPzlVCzfzmGkwKvRHQVxi4BntVbyIwf6a4Xn62mrvndLiml6z/UBXIdEVjQLXg==", "dev": true, - "license": "MIT", "dependencies": { "undici-types": "~6.19.2" } From be0c0bd8476dbf6d4a008f0861b528f112cde1d7 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 10 Sep 2024 09:06:51 +0200 Subject: [PATCH 024/339] Fix error flood by downgrading OTel dependencies (#14884) Fixes #14859, although we'll have to see about a long-term fix. Hopefully it'll be fixed upstream with a follow-up version. Signed-off-by: Julius Volz --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 845e3277b..4a2dd1c77 100644 --- a/go.mod +++ b/go.mod @@ -64,7 +64,7 @@ require ( github.com/vultr/govultr/v2 v2.17.2 go.opentelemetry.io/collector/pdata v1.14.1 go.opentelemetry.io/collector/semconv v0.108.1 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 go.opentelemetry.io/otel v1.29.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.29.0 @@ -81,7 +81,7 @@ require ( golang.org/x/text v0.18.0 golang.org/x/time v0.6.0 golang.org/x/tools v0.24.0 - google.golang.org/api v0.196.0 + google.golang.org/api v0.195.0 google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed google.golang.org/grpc v1.66.0 google.golang.org/protobuf v1.34.2 diff --git a/go.sum b/go.sum index edb5b650b..4fc4f93bd 100644 --- a/go.sum +++ b/go.sum @@ -736,8 +736,8 @@ go.opentelemetry.io/collector/pdata v1.14.1 h1:wXZjtQA7Vy5HFqco+yA95ENyMQU5heBB1 go.opentelemetry.io/collector/pdata v1.14.1/go.mod h1:z1dTjwwtcoXxZx2/nkHysjxMeaxe9pEmYTEr4SMNIx8= go.opentelemetry.io/collector/semconv v0.108.1 h1:Txk9tauUnamZaxS5vlf1O0uZ4VD6nioRBR0nX8L/fU4= go.opentelemetry.io/collector/semconv v0.108.1/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg= go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E= @@ -1056,8 +1056,8 @@ google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/ google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.196.0 h1:k/RafYqebaIJBO3+SMnfEGtFVlvp5vSgqTUF54UN/zg= -google.golang.org/api v0.196.0/go.mod h1:g9IL21uGkYgvQ5BZg6BAtoGJQIm8r6EgaAbpNey5wBE= +google.golang.org/api v0.195.0 h1:Ude4N8FvTKnnQJHU48RFI40jOBgIrL8Zqr3/QeST6yU= +google.golang.org/api v0.195.0/go.mod h1:DOGRWuv3P8TU8Lnz7uQc4hyNqrBpMtD9ppW3wBJurgc= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= From 8aab6458b8ac1e519f9ecee32ec33294166b28c9 Mon Sep 17 00:00:00 2001 From: Julien Date: Tue, 10 Sep 2024 12:12:00 +0200 Subject: [PATCH 025/339] Mantine UI: Move /discovered-alertmanagers to /alertmanager-discovery Signed-off-by: Julien --- web/ui/mantine-ui/src/App.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/ui/mantine-ui/src/App.tsx b/web/ui/mantine-ui/src/App.tsx index 9cba77702..789c4b97a 100644 --- a/web/ui/mantine-ui/src/App.tsx +++ b/web/ui/mantine-ui/src/App.tsx @@ -142,7 +142,7 @@ const serverStatusPages = [ }, { title: "Alertmanager discovery", - path: "/discovered-alertmanagers", + path: "/alertmanager-discovery", icon: , element: , inAgentMode: false, From be6d443947ee65ae0c67222804495177ce90e2bc Mon Sep 17 00:00:00 2001 From: Julien Date: Tue, 10 Sep 2024 12:39:27 +0200 Subject: [PATCH 026/339] Mantine UI: Use actual lookback delta in explain Signed-off-by: Julien --- web/ui/mantine-ui/index.html | 8 +++++--- .../mantine-ui/src/pages/query/ExplainViews/Selector.tsx | 6 ++++-- web/ui/mantine-ui/src/state/settingsSlice.ts | 7 +++++++ web/web.go | 1 + 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/web/ui/mantine-ui/index.html b/web/ui/mantine-ui/index.html index deb5f7f56..d2723488a 100644 --- a/web/ui/mantine-ui/index.html +++ b/web/ui/mantine-ui/index.html @@ -7,19 +7,21 @@ -```yaml - metric_relabel_configs: - - source_labels: - - quantile - target_label: quantile - regex: (\d+)\.0+ - - source_labels: - - le - - __name__ - target_label: le - regex: (\d+)\.0+;.*_bucket -``` - ## Experimental PromQL functions `--enable-feature=promql-experimental-functions` diff --git a/model/textparse/interface.go b/model/textparse/interface.go index 3b0e9a96e..1a8f3dc48 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -23,8 +23,7 @@ import ( "github.com/prometheus/prometheus/model/labels" ) -// Parser parses samples from a byte slice of samples in the official -// Prometheus and OpenMetrics text exposition formats. +// Parser parses samples from a byte slice of samples in different exposition formats. type Parser interface { // Series returns the bytes of a series with a simple float64 as a // value, the timestamp if set, and the value of the current sample. @@ -58,6 +57,8 @@ type Parser interface { // Metric writes the labels of the current sample into the passed labels. // It returns the string from which the metric was parsed. + // The values of the "le" labels of classic histograms and "quantile" labels + // of summaries should follow the OpenMetrics formatting rules. Metric(l *labels.Labels) string // Exemplar writes the exemplar of the current sample into the passed diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 13629e66d..8d3ad75c1 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -22,6 +22,7 @@ import ( "fmt" "io" "math" + "strconv" "strings" "unicode/utf8" @@ -210,7 +211,7 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) p.builder.Add(label, value) } @@ -724,3 +725,15 @@ func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error } return val, nil } + +// normalizeFloatsInLabelValues ensures that values of the "le" labels of classic histograms and "quantile" labels +// of summaries follow OpenMetrics formatting rules. +func normalizeFloatsInLabelValues(t model.MetricType, l, v string) string { + if (t == model.MetricTypeSummary && l == model.QuantileLabel) || (t == model.MetricTypeHistogram && l == model.BucketLabel) { + f, err := strconv.ParseFloat(v, 64) + if err == nil { + return formatOpenMetricsFloat(f) + } + } + return v +} diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index 467a23771..9c3c679ab 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -74,6 +74,7 @@ foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 foo_created{a="b"} 1520872607.123 foo_total{le="c"} 21.0 foo_created{le="c"} 1520872621.123 +foo_total{le="1"} 10.0 # HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far # TYPE bar summary bar_count 17.0 @@ -97,6 +98,7 @@ something_count 18 something_sum 324789.4 something_created 1520430001 something_bucket{le="0.0"} 1 +something_bucket{le="1"} 2 something_bucket{le="+Inf"} 18 # HELP yum Summary with _created between sum and quantiles # TYPE yum summary @@ -130,7 +132,7 @@ foobar{quantile="0.99"} 150.1` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25"}`, v: 7.424100000000001e-05, @@ -302,6 +304,10 @@ foobar{quantile="0.99"} 150.1` v: 21.0, lset: labels.FromStrings("__name__", "foo_total", "le", "c"), ct: int64p(1520872621123), + }, { + m: `foo_total{le="1"}`, + v: 10.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "1"), }, { m: "bar", help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", @@ -385,6 +391,11 @@ foobar{quantile="0.99"} 150.1` v: 1, lset: labels.FromStrings("__name__", "something_bucket", "le", "0.0"), ct: int64p(1520430001000), + }, { + m: `something_bucket{le="1"}`, + v: 2, + lset: labels.FromStrings("__name__", "something_bucket", "le", "1.0"), + ct: int64p(1520430001000), }, { m: `something_bucket{le="+Inf"}`, v: 18, @@ -492,7 +503,7 @@ func TestUTF8OpenMetricsParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), ct: int64p(1520872607123), }, { m: `{"go.gc_duration_seconds",quantile="0.25"}`, diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 575976927..0ab932c66 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -239,7 +239,8 @@ func (p *PromParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) + p.builder.Add(label, value) } diff --git a/model/textparse/promparse_test.go b/model/textparse/promparse_test.go index b726d8847..e8cf66f53 100644 --- a/model/textparse/promparse_test.go +++ b/model/textparse/promparse_test.go @@ -31,6 +31,13 @@ go_gc_duration_seconds{quantile="0.25",} 7.424100000000001e-05 go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 go_gc_duration_seconds{quantile="0.8", a="b"} 8.3835e-05 go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05 +# HELP prometheus_http_request_duration_seconds Histogram of latencies for HTTP requests. +# TYPE prometheus_http_request_duration_seconds histogram +prometheus_http_request_duration_seconds_bucket{handler="/",le="1"} 423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="2"} 1423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"} 1423 +prometheus_http_request_duration_seconds_sum{handler="/"} 2000 +prometheus_http_request_duration_seconds_count{handler="/"} 1423 # Hrandom comment starting with prefix of HELP # wind_speed{A="2",c="3"} 12345 @@ -50,7 +57,8 @@ some:aggregate:rate5m{a_b="c"} 1 go_goroutines 33 123123 _metric_starting_with_underscore 1 testmetric{_label_starting_with_underscore="foo"} 1 -testmetric{label="\"bar\""} 1` +testmetric{label="\"bar\""} 1 +testmetric{le="10"} 1` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" @@ -64,7 +72,7 @@ testmetric{label="\"bar\""} 1` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25",}`, v: 7.424100000000001e-05, @@ -81,6 +89,32 @@ testmetric{label="\"bar\""} 1` m: `go_gc_duration_seconds{ quantile="0.9", a="b"}`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.9", "a", "b"), + }, { + m: "prometheus_http_request_duration_seconds", + help: "Histogram of latencies for HTTP requests.", + }, { + m: "prometheus_http_request_duration_seconds", + typ: model.MetricTypeHistogram, + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="1"}`, + v: 423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "1.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="2"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "2.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "+Inf"), + }, { + m: `prometheus_http_request_duration_seconds_sum{handler="/"}`, + v: 2000, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_sum", "handler", "/"), + }, { + m: `prometheus_http_request_duration_seconds_count{handler="/"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_count", "handler", "/"), }, { comment: "# Hrandom comment starting with prefix of HELP", }, { @@ -151,6 +185,10 @@ testmetric{label="\"bar\""} 1` m: "testmetric{label=\"\\\"bar\\\"\"}", v: 1, lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: `testmetric{le="10"}`, + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "le", "10"), }, { m: "metric", help: "foo\x00bar", @@ -197,7 +235,7 @@ func TestUTF8PromParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), }, { m: `{"go.gc_duration_seconds",quantile="0.25",}`, v: 7.424100000000001e-05, From cf128a04727cc232d8d96a1be8e089c11b7e3c88 Mon Sep 17 00:00:00 2001 From: machine424 Date: Wed, 7 Aug 2024 19:14:59 +0200 Subject: [PATCH 279/339] test(cmd/prometheus): speed up test execution by t.Parallel() when possible turn some loops into subtests to make use of t.Parallel() requires Go 1.22 to make use of https://go.dev/blog/loopvar-preview Signed-off-by: machine424 --- cmd/prometheus/main_test.go | 171 ++++++++++++++++++++----------- cmd/prometheus/main_unix_test.go | 1 + cmd/prometheus/query_log_test.go | 2 + 3 files changed, 116 insertions(+), 58 deletions(-) diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index d0c2846be..4bd1c71b2 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -125,6 +125,7 @@ func TestFailedStartupExitCode(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() fakeInputFile := "fake-input-file" expectedExitStatus := 2 @@ -211,83 +212,125 @@ func TestWALSegmentSizeBounds(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "9MB", + exitCode: 1, + }, + { + size: "257MB", + exitCode: 1, + }, + { + size: "10", + exitCode: 2, + }, + { + size: "1GB", + exitCode: 1, + }, + { + size: "12MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() - err = prom.Start() - require.NoError(t, err) + err = prom.Start() + require.NoError(t, err) - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) { - t.Parallel() - if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"512KB": 1, "1MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "512KB", + exitCode: 1, + }, + { + size: "1MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() - err = prom.Start() - require.NoError(t, err) + err = prom.Start() + require.NoError(t, err) - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } @@ -353,6 +396,8 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames } func TestAgentSuccessfulStartup(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) require.NoError(t, prom.Start()) @@ -371,6 +416,8 @@ func TestAgentSuccessfulStartup(t *testing.T) { } func TestAgentFailedStartupWithServerFlag(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) output := bytes.Buffer{} @@ -398,6 +445,8 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) { } func TestAgentFailedStartupWithInvalidConfig(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) require.NoError(t, prom.Start()) @@ -419,6 +468,7 @@ func TestModeSpecificFlags(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() testcases := []struct { mode string @@ -433,6 +483,7 @@ func TestModeSpecificFlags(t *testing.T) { for _, tc := range testcases { t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) { + t.Parallel() args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"} if tc.mode == "agent" { @@ -484,6 +535,8 @@ func TestDocumentation(t *testing.T) { if runtime.GOOS == "windows" { t.SkipNow() } + t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -508,6 +561,8 @@ func TestDocumentation(t *testing.T) { } func TestRwProtoMsgFlagParser(t *testing.T) { + t.Parallel() + defaultOpts := config.RemoteWriteProtoMsgs{ config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2, } diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go index 2011fb123..94eec27e7 100644 --- a/cmd/prometheus/main_unix_test.go +++ b/cmd/prometheus/main_unix_test.go @@ -34,6 +34,7 @@ func TestStartupInterrupt(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index f05ad9df2..25abf5e96 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -456,6 +456,7 @@ func TestQueryLog(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() cwd, err := os.Getwd() require.NoError(t, err) @@ -474,6 +475,7 @@ func TestQueryLog(t *testing.T) { } t.Run(p.String(), func(t *testing.T) { + t.Parallel() p.run(t) }) } From 482bb453c6f61c8f4ed4f05c9a1cbdc60cc1f3c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 11:03:07 +0200 Subject: [PATCH 280/339] Followup to #15164 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update test cases Signed-off-by: György Krajcsovits --- model/textparse/nhcbparse_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go index a52e8637e..7cff21712 100644 --- a/model/textparse/nhcbparse_test.go +++ b/model/textparse/nhcbparse_test.go @@ -131,7 +131,7 @@ foobar{quantile="0.99"} 150.1` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25"}`, v: 7.424100000000001e-05, From 70742a64aa97762a062e3595e0c0cf50788bee6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 11:03:47 +0200 Subject: [PATCH 281/339] Follow up #15178 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renaming Signed-off-by: György Krajcsovits --- scrape/scrape_test.go | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index fef4d0b7f..35d5f14ac 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -3844,24 +3844,24 @@ metric: < for metricsTextName, metricsText := range metricsTexts { for name, tc := range map[string]struct { - scrapeClassicHistograms bool - convertClassicHistograms bool + alwaysScrapeClassicHistograms bool + convertClassicHistograms bool }{ "convert with scrape": { - scrapeClassicHistograms: true, - convertClassicHistograms: true, + alwaysScrapeClassicHistograms: true, + convertClassicHistograms: true, }, "convert without scrape": { - scrapeClassicHistograms: false, - convertClassicHistograms: true, + alwaysScrapeClassicHistograms: false, + convertClassicHistograms: true, }, "scrape without convert": { - scrapeClassicHistograms: true, - convertClassicHistograms: false, + alwaysScrapeClassicHistograms: true, + convertClassicHistograms: false, }, "neither scrape nor convert": { - scrapeClassicHistograms: false, - convertClassicHistograms: false, + alwaysScrapeClassicHistograms: false, + convertClassicHistograms: false, }, } { var expectedClassicHistCount, expectedNativeHistCount int @@ -3870,16 +3870,16 @@ metric: < expectedNativeHistCount = 1 expectCustomBuckets = false expectedClassicHistCount = 0 - if metricsText.hasClassic && tc.scrapeClassicHistograms { + if metricsText.hasClassic && tc.alwaysScrapeClassicHistograms { expectedClassicHistCount = 1 } } else if metricsText.hasClassic { switch { - case tc.scrapeClassicHistograms && tc.convertClassicHistograms: + case tc.alwaysScrapeClassicHistograms && tc.convertClassicHistograms: expectedClassicHistCount = 1 expectedNativeHistCount = 1 expectCustomBuckets = true - case !tc.scrapeClassicHistograms && tc.convertClassicHistograms: + case !tc.alwaysScrapeClassicHistograms && tc.convertClassicHistograms: expectedClassicHistCount = 0 expectedNativeHistCount = 1 expectCustomBuckets = true @@ -3894,13 +3894,13 @@ metric: < defer simpleStorage.Close() config := &config.ScrapeConfig{ - JobName: "test", - SampleLimit: 100, - Scheme: "http", - ScrapeInterval: model.Duration(100 * time.Millisecond), - ScrapeTimeout: model.Duration(100 * time.Millisecond), - ScrapeClassicHistograms: tc.scrapeClassicHistograms, - ConvertClassicHistograms: tc.convertClassicHistograms, + JobName: "test", + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + AlwaysScrapeClassicHistograms: tc.alwaysScrapeClassicHistograms, + ConvertClassicHistograms: tc.convertClassicHistograms, } scrapeCount := 0 From a23aed5634169ad400947a7ff91955d6a64cb5b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 11:10:50 +0200 Subject: [PATCH 282/339] More followup to #15164 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scrape test for NHCB modified. Signed-off-by: György Krajcsovits --- scrape/scrape_test.go | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 35d5f14ac..6187119bf 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -3758,17 +3758,11 @@ metric: < }, } - checkBucketValues := func(expectedCount int, contentType string, series storage.SeriesSet) { + checkBucketValues := func(expectedCount int, series storage.SeriesSet) { labelName := "le" var expectedValues []string if expectedCount > 0 { - if contentType == "application/vnd.google.protobuf" { - // The expected "le" values have the trailing ".0". - expectedValues = []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1.0", "2.5", "5.0", "10.0", "+Inf"} - } else { - // The expected "le" values do not have the trailing ".0". - expectedValues = []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1", "2.5", "5", "10", "+Inf"} - } + expectedValues = []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1.0", "2.5", "5.0", "10.0", "+Inf"} } foundLeValues := map[string]bool{} @@ -3984,7 +3978,7 @@ metric: < checkFloatSeries(series, expectedClassicHistCount, 10.) series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_bucket", i))) - checkBucketValues(expectedClassicHistCount, metricsText.contentType, series) + checkBucketValues(expectedClassicHistCount, series) series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d", i))) From 4283ae73dcc3439d851c227dc770310f20f24e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 13:22:58 +0200 Subject: [PATCH 283/339] Rename convert_classic_histograms to convert_classic_histograms_to_nhcb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On reviewer request. Signed-off-by: György Krajcsovits --- config/config.go | 2 +- scrape/manager.go | 2 +- scrape/scrape.go | 19 +++++++++++-------- scrape/scrape_test.go | 32 ++++++++++++++++---------------- 4 files changed, 29 insertions(+), 26 deletions(-) diff --git a/config/config.go b/config/config.go index 962a0f4a7..657c4fc75 100644 --- a/config/config.go +++ b/config/config.go @@ -656,7 +656,7 @@ type ScrapeConfig struct { // Whether to scrape a classic histogram, even if it is also exposed as a native histogram. AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"` // Whether to convert all scraped classic histograms into a native histogram with custom buckets. - ConvertClassicHistograms bool `yaml:"convert_classic_histograms,omitempty"` + ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"` // File to which scrape failures are logged. ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"` // The HTTP resource path on which to fetch metrics from targets. diff --git a/scrape/manager.go b/scrape/manager.go index 9791db0e8..f3dad2a04 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -178,7 +178,7 @@ func (m *Manager) reload() { m.logger.Error("error reloading target set", "err", "invalid config id:"+setName) continue } - if scrapeConfig.ConvertClassicHistograms && m.opts.EnableCreatedTimestampZeroIngestion { + if scrapeConfig.ConvertClassicHistogramsToNHCB && m.opts.EnableCreatedTimestampZeroIngestion { // TODO(krajorama): fix https://github.com/prometheus/prometheus/issues/15137 m.logger.Error("error reloading target set", "err", "cannot convert classic histograms to native histograms with custom buckets and ingest created timestamp zero samples at the same time due to https://github.com/prometheus/prometheus/issues/15137") continue diff --git a/scrape/scrape.go b/scrape/scrape.go index 290855b3a..c252d57f6 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -113,7 +113,7 @@ type scrapeLoopOptions struct { interval time.Duration timeout time.Duration alwaysScrapeClassicHist bool - convertClassicHistograms bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme fallbackScrapeProtocol string @@ -182,7 +182,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.interval, opts.timeout, opts.alwaysScrapeClassicHist, - opts.convertClassicHistograms, + opts.convertClassicHistToNHCB, options.EnableNativeHistogramsIngestion, options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, @@ -488,7 +488,7 @@ func (sp *scrapePool) sync(targets []*Target) { mrc = sp.config.MetricRelabelConfigs fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() alwaysScrapeClassicHist = sp.config.AlwaysScrapeClassicHistograms - convertClassicHistograms = sp.config.ConvertClassicHistograms + convertClassicHistToNHCB = sp.config.ConvertClassicHistogramsToNHCB ) validationScheme := model.UTF8Validation @@ -530,7 +530,7 @@ func (sp *scrapePool) sync(targets []*Target) { interval: interval, timeout: timeout, alwaysScrapeClassicHist: alwaysScrapeClassicHist, - convertClassicHistograms: convertClassicHistograms, + convertClassicHistToNHCB: convertClassicHistToNHCB, validationScheme: validationScheme, fallbackScrapeProtocol: fallbackScrapeProtocol, }) @@ -894,7 +894,7 @@ type scrapeLoop struct { interval time.Duration timeout time.Duration alwaysScrapeClassicHist bool - convertClassicHistograms bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme fallbackScrapeProtocol string @@ -1196,7 +1196,7 @@ func newScrapeLoop(ctx context.Context, interval time.Duration, timeout time.Duration, alwaysScrapeClassicHist bool, - convertClassicHistograms bool, + convertClassicHistToNHCB bool, enableNativeHistogramIngestion bool, enableCTZeroIngestion bool, reportExtraMetrics bool, @@ -1252,7 +1252,7 @@ func newScrapeLoop(ctx context.Context, interval: interval, timeout: timeout, alwaysScrapeClassicHist: alwaysScrapeClassicHist, - convertClassicHistograms: convertClassicHistograms, + convertClassicHistToNHCB: convertClassicHistToNHCB, enableNativeHistogramIngestion: enableNativeHistogramIngestion, enableCTZeroIngestion: enableCTZeroIngestion, reportExtraMetrics: reportExtraMetrics, @@ -1563,7 +1563,7 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ) return } - if sl.convertClassicHistograms { + if sl.convertClassicHistToNHCB { p = textparse.NewNHCBParser(p, sl.symbolTable, sl.alwaysScrapeClassicHist) } if err != nil { @@ -1751,6 +1751,9 @@ loop: } else { ref, err = app.AppendHistogram(ref, lset, t, nil, fh) } + if err != nil { + fmt.Printf("Error when appending histogram in scrape loop: %s\n", err) + } } else { ref, err = app.Append(ref, lset, t, val) } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 6187119bf..9a70d7411 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -3478,7 +3478,7 @@ test_summary_count 199 } // Testing whether we can automatically convert scraped classic histograms into native histograms with custom buckets. -func TestConvertClassicHistograms(t *testing.T) { +func TestConvertClassicHistogramsToNHCB(t *testing.T) { genTestCounterText := func(name string, value int, withMetadata bool) string { if withMetadata { return fmt.Sprintf(` @@ -3839,23 +3839,23 @@ metric: < for metricsTextName, metricsText := range metricsTexts { for name, tc := range map[string]struct { alwaysScrapeClassicHistograms bool - convertClassicHistograms bool + convertClassicHistToNHCB bool }{ "convert with scrape": { alwaysScrapeClassicHistograms: true, - convertClassicHistograms: true, + convertClassicHistToNHCB: true, }, "convert without scrape": { alwaysScrapeClassicHistograms: false, - convertClassicHistograms: true, + convertClassicHistToNHCB: true, }, "scrape without convert": { alwaysScrapeClassicHistograms: true, - convertClassicHistograms: false, + convertClassicHistToNHCB: false, }, "neither scrape nor convert": { alwaysScrapeClassicHistograms: false, - convertClassicHistograms: false, + convertClassicHistToNHCB: false, }, } { var expectedClassicHistCount, expectedNativeHistCount int @@ -3869,15 +3869,15 @@ metric: < } } else if metricsText.hasClassic { switch { - case tc.alwaysScrapeClassicHistograms && tc.convertClassicHistograms: + case tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: expectedClassicHistCount = 1 expectedNativeHistCount = 1 expectCustomBuckets = true - case !tc.alwaysScrapeClassicHistograms && tc.convertClassicHistograms: + case !tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: expectedClassicHistCount = 0 expectedNativeHistCount = 1 expectCustomBuckets = true - case !tc.convertClassicHistograms: + case !tc.convertClassicHistToNHCB: expectedClassicHistCount = 1 expectedNativeHistCount = 0 } @@ -3888,13 +3888,13 @@ metric: < defer simpleStorage.Close() config := &config.ScrapeConfig{ - JobName: "test", - SampleLimit: 100, - Scheme: "http", - ScrapeInterval: model.Duration(100 * time.Millisecond), - ScrapeTimeout: model.Duration(100 * time.Millisecond), - AlwaysScrapeClassicHistograms: tc.alwaysScrapeClassicHistograms, - ConvertClassicHistograms: tc.convertClassicHistograms, + JobName: "test", + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + AlwaysScrapeClassicHistograms: tc.alwaysScrapeClassicHistograms, + ConvertClassicHistogramsToNHCB: tc.convertClassicHistToNHCB, } scrapeCount := 0 From 5ee0980cd1d50881f9693a85c486f25bca2976da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 13:35:33 +0200 Subject: [PATCH 284/339] Add unit test to show that current wrapper is sub-optimal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/prometheus/prometheus/pull/14978#discussion_r1800755481 Signed-off-by: György Krajcsovits --- model/textparse/nhcbparse_test.go | 175 +++++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 1 deletion(-) diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go index 7cff21712..37fcccb9d 100644 --- a/model/textparse/nhcbparse_test.go +++ b/model/textparse/nhcbparse_test.go @@ -14,13 +14,18 @@ package textparse import ( + "bytes" + "encoding/binary" "testing" - "github.com/prometheus/common/model" + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" + "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) func TestNHCBParserOnOMParser(t *testing.T) { @@ -513,3 +518,171 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 got := testParse(t, p) requireEntries(t, exp, got) } + +// Verify that the NHCBParser does not parse the NHCB when the exponential is present. +func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) { + inputBuf := createTestProtoBufHistogram(t) + // Initialize the protobuf parser so that it returns classic histograms as + // well when there's both classic and exponential histograms. + p := NewProtobufParser(inputBuf.Bytes(), true, labels.NewSymbolTable()) + + // Initialize the NHCBParser so that it returns classic histograms as well + // when there's both classic and exponential histograms. + p = NewNHCBParser(p, labels.NewSymbolTable(), true) + + exp := []parsedEntry{ + { + m: "test_histogram", + help: "Test histogram with classic and exponential buckets.", + }, + { + m: "test_histogram", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram", + shs: &histogram.Histogram{ + Schema: 3, + Count: 175, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 2, + PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, + NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings("__name__", "test_histogram"), + t: int64p(1234568), + }, + { + m: "test_histogram_count", + v: 175, + lset: labels.FromStrings("__name__", "test_histogram_count"), + t: int64p(1234568), + }, + { + m: "test_histogram_sum", + v: 0.0008280461746287094, + lset: labels.FromStrings("__name__", "test_histogram_sum"), + t: int64p(1234568), + }, + { + m: "test_histogram_bucket\xffle\xff-0.0004899999999999998", + v: 2, + lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), + t: int64p(1234568), + }, + { + m: "test_histogram_bucket\xffle\xff-0.0003899999999999998", + v: 4, + lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), + t: int64p(1234568), + }, + { + m: "test_histogram_bucket\xffle\xff-0.0002899999999999998", + v: 16, + lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), + t: int64p(1234568), + }, + { + m: "test_histogram_bucket\xffle\xff+Inf", + v: 175, + lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), + t: int64p(1234568), + }, + { + // TODO(krajorama): optimize: this should not be here. In case there's + // an exponential histogram we should not scrape the classic histogram. + // TSDB will throw this away with storage.errDuplicateSampleForTimestamp + // at Commit(), but it needs to be parsed here after the exponential + // histogram. + m: "test_histogram{}", + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 175, + Sum: 0.0008280461746287094, + PositiveSpans: []histogram.Span{{Length: 4}}, + PositiveBuckets: []int64{2, 0, 10, 147}, + CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, + }, + lset: labels.FromStrings("__name__", "test_histogram"), + t: int64p(1234568), + }, + } + got := testParse(t, p) + requireEntries(t, exp, got) +} + +func createTestProtoBufHistogram(t *testing.T) *bytes.Buffer { + testMetricFamilies := []string{`name: "test_histogram" +help: "Test histogram with classic and exponential buckets." +type: HISTOGRAM +metric: < + histogram: < + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +> +`} + + varintBuf := make([]byte, binary.MaxVarintLen32) + buf := &bytes.Buffer{} + + for _, tmf := range testMetricFamilies { + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(tmf, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + } + + return buf +} From eaee6bacc7960e3f00fd057458228fce28dddd21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 13:40:16 +0200 Subject: [PATCH 285/339] Fix failing benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- model/textparse/benchmark_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go index 98aadb0ed..bc9c2d1db 100644 --- a/model/textparse/benchmark_test.go +++ b/model/textparse/benchmark_test.go @@ -40,7 +40,7 @@ var newTestParserFns = map[string]newParser{ "omtext": func(b []byte, st *labels.SymbolTable) Parser { return NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) }, - "omtext_with_nhcb": func(b []byte, st *labels.SymbolTable) Parser { + "nhcb_over_omtext": func(b []byte, st *labels.SymbolTable) Parser { p := NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) return NewNHCBParser(p, st, false) }, From a6947e1e6da848c2b1f83d5144eb6ac5a8083e5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 13:45:33 +0200 Subject: [PATCH 286/339] Remove omcounterdata.txt as redundant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- model/textparse/benchmark_test.go | 2 -- model/textparse/testdata/omcounterdata.txt | 9 --------- 2 files changed, 11 deletions(-) delete mode 100644 model/textparse/testdata/omcounterdata.txt diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go index bc9c2d1db..f6d3a9559 100644 --- a/model/textparse/benchmark_test.go +++ b/model/textparse/benchmark_test.go @@ -86,8 +86,6 @@ func BenchmarkParse(b *testing.B) { // NHCB. {dataFile: "omhistogramdata.txt", parser: "omtext"}, // Measure OM parser baseline for histograms. {dataFile: "omhistogramdata.txt", parser: "nhcb_over_omtext"}, // Measure NHCB over OM parser. - {dataFile: "omcounterdata.txt", parser: "omtext"}, // Measure OM parser baseline for counters. - {dataFile: "omcounterdata.txt", parser: "nhcb_over_omtext"}, // Measure NHCB over OM parser. } { var buf []byte dataCase := bcase.dataFile diff --git a/model/textparse/testdata/omcounterdata.txt b/model/textparse/testdata/omcounterdata.txt deleted file mode 100644 index 15459c018..000000000 --- a/model/textparse/testdata/omcounterdata.txt +++ /dev/null @@ -1,9 +0,0 @@ -# HELP rpc_requests Total number of RPC requests received. -# TYPE rpc_requests counter -rpc_requests_total{service="exponential"} 22.0 -rpc_requests_created{service="exponential"} 1.726839813016893e+09 -rpc_requests_total{service="normal"} 15.0 -rpc_requests_created{service="normal"} 1.726839813016717e+09 -rpc_requests_total{service="uniform"} 11.0 -rpc_requests_created{service="uniform"} 1.7268398130168471e+09 -# EOF From 555bd6292a1be32aa546731c6913a7ff19fe8311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 13:48:21 +0200 Subject: [PATCH 287/339] Better docstring on test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- model/textparse/nhcbparse_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go index 37fcccb9d..80d846646 100644 --- a/model/textparse/nhcbparse_test.go +++ b/model/textparse/nhcbparse_test.go @@ -593,10 +593,10 @@ func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) { }, { // TODO(krajorama): optimize: this should not be here. In case there's - // an exponential histogram we should not scrape the classic histogram. - // TSDB will throw this away with storage.errDuplicateSampleForTimestamp - // at Commit(), but it needs to be parsed here after the exponential - // histogram. + // an exponential histogram we should not convert the classic histogram + // to NHCB. In the end TSDB will throw this away with + // storage.errDuplicateSampleForTimestamp error at Commit(), but it + // is better to avoid this conversion in the first place. m: "test_histogram{}", shs: &histogram.Histogram{ Schema: histogram.CustomBucketsSchema, From bee1eb77206f9973b0b9d2528b6c2b7f0506223f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 14:02:32 +0200 Subject: [PATCH 288/339] goimports run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- model/textparse/nhcbparse_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go index 80d846646..80b65fd22 100644 --- a/model/textparse/nhcbparse_test.go +++ b/model/textparse/nhcbparse_test.go @@ -22,6 +22,7 @@ import ( "github.com/stretchr/testify/require" "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" From 25ef4d34839c7cca87fec09d4c616e1ada9dce78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Mon, 21 Oct 2024 15:40:48 +0200 Subject: [PATCH 289/339] benchmark, rename parser omtext_with_nhcb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: György Krajcsovits --- model/textparse/benchmark_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go index f6d3a9559..bd0d5089a 100644 --- a/model/textparse/benchmark_test.go +++ b/model/textparse/benchmark_test.go @@ -40,7 +40,7 @@ var newTestParserFns = map[string]newParser{ "omtext": func(b []byte, st *labels.SymbolTable) Parser { return NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) }, - "nhcb_over_omtext": func(b []byte, st *labels.SymbolTable) Parser { + "omtext_with_nhcb": func(b []byte, st *labels.SymbolTable) Parser { p := NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) return NewNHCBParser(p, st, false) }, @@ -85,7 +85,7 @@ func BenchmarkParse(b *testing.B) { // NHCB. {dataFile: "omhistogramdata.txt", parser: "omtext"}, // Measure OM parser baseline for histograms. - {dataFile: "omhistogramdata.txt", parser: "nhcb_over_omtext"}, // Measure NHCB over OM parser. + {dataFile: "omhistogramdata.txt", parser: "omtext_with_nhcb"}, // Measure NHCB over OM parser. } { var buf []byte dataCase := bcase.dataFile From 877fd2a60e027732c54490049a3a849657d5e08c Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Mon, 21 Oct 2024 16:01:34 +0200 Subject: [PATCH 290/339] Update scrape/scrape.go Signed-off-by: George Krajcsovits --- scrape/scrape.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/scrape/scrape.go b/scrape/scrape.go index c252d57f6..f5f02d245 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -1751,9 +1751,6 @@ loop: } else { ref, err = app.AppendHistogram(ref, lset, t, nil, fh) } - if err != nil { - fmt.Printf("Error when appending histogram in scrape loop: %s\n", err) - } } else { ref, err = app.Append(ref, lset, t, val) } From d2802c6facf16dee253135f839aff071836b1665 Mon Sep 17 00:00:00 2001 From: Yijie Qin Date: Mon, 21 Oct 2024 19:04:40 -0400 Subject: [PATCH 291/339] api: Add rule group pagination to list rules api (#14017) * Add paginated feature to list rules api Signed-off-by: Yijie Qin * Refactor to simplify code: * Reduce number of variables * Reduce type convesion Signed-off-by: Raphael Silva * Simplify paginated implementation * Remove maxAlerts parameter. * Reuse existing API responses by using omitempty in some fields Signed-off-by: Raphael Silva * Simplify pagination implementation * Eliminate the need to sort the rule groups. Signed-off-by: Raphael Silva * Fix linting error Signed-off-by: Raphael Silva * Add more unit tests Signed-off-by: Raphael Silva * Update pagination parameters to be consistent with existing parameters Signed-off-by: Raphael Silva * Rename max_rule_groups to max_groups Signed-off-by: Raphael Silva * Refactor to simplify code Signed-off-by: Raphael Silva * Refactor to simplify the calculation of next token Signed-off-by: Raphael Silva * Handle corner case in pagination request Signed-off-by: Raphael Silva * Handle corner cases for pagination of list rules Signed-off-by: Raphael Silva * Update documentation for list rules parameters Signed-off-by: Raphael Silva * Refactor comments Signed-off-by: Raphael Silva * Simplify pagination implementation * Eliminate need for extra structs to store pagination parameters Signed-off-by: Raphael Silva * Update docs/querying/api.md Co-authored-by: Julius Volz Signed-off-by: Raphael Philipe Mendes da Silva * Update web/api/v1/api.go Co-authored-by: Bartlomiej Plotka Signed-off-by: Raphael Philipe Mendes da Silva * Update comment describing the need for next token Signed-off-by: Raphael Silva --------- Signed-off-by: Yijie Qin Signed-off-by: Raphael Silva Signed-off-by: Raphael Philipe Mendes da Silva Co-authored-by: Raphael Silva Co-authored-by: Julius Volz Co-authored-by: Bartlomiej Plotka --- docs/querying/api.md | 2 + web/api/v1/api.go | 71 ++++++++++++- web/api/v1/api_test.go | 220 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 291 insertions(+), 2 deletions(-) diff --git a/docs/querying/api.md b/docs/querying/api.md index 1095171b2..6b7ae0524 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -764,6 +764,8 @@ URL query parameters: - `file[]=`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done. - `exclude_alerts=`: only return rules, do not return active alerts. - `match[]=`: only return rules that have configured labels that satisfy the label selectors. If the parameter is repeated, rules that match any of the sets of label selectors are returned. Note that matching is on the labels in the definition of each rule, not on the values after template expansion (for alerting rules). Optional. +- `group_limit=`: The `group_limit` parameter allows you to specify a limit for the number of rule groups that is returned in a single response. If the total number of rule groups exceeds the specified `group_limit` value, the response will include a `groupNextToken` property. You can use the value of this `groupNextToken` property in subsequent requests in the `group_next_token` parameter to paginate over the remaining rule groups. The `groupNextToken` property will not be present in the final response, indicating that you have retrieved all the available rule groups. Please note that there are no guarantees regarding the consistency of the response if the rule groups are being modified during the pagination process. +- `group_next_token`: the pagination token that was returned in previous request when the `group_limit` property is set. The pagination token is used to iteratively paginate over a large number of rule groups. To use the `group_next_token` parameter, the `group_limit` parameter also need to be present. If a rule group that coincides with the next token is removed while you are paginating over the rule groups, a response with status code 400 will be returned. ```json $ curl http://localhost:9090/api/v1/rules diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 9fb01f576..b37605f5d 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -15,6 +15,8 @@ package v1 import ( "context" + "crypto/sha1" + "encoding/hex" "encoding/json" "errors" "fmt" @@ -1371,7 +1373,8 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { // RuleDiscovery has info for all rules. type RuleDiscovery struct { - RuleGroups []*RuleGroup `json:"groups"` + RuleGroups []*RuleGroup `json:"groups"` + GroupNextToken string `json:"groupNextToken:omitempty"` } // RuleGroup has info for rules which are part of a group. @@ -1458,8 +1461,23 @@ func (api *API) rules(r *http.Request) apiFuncResult { return invalidParamError(err, "exclude_alerts") } + maxGroups, nextToken, parseErr := parseListRulesPaginationRequest(r) + if parseErr != nil { + return *parseErr + } + rgs := make([]*RuleGroup, 0, len(ruleGroups)) + + foundToken := false + for _, grp := range ruleGroups { + if maxGroups > 0 && nextToken != "" && !foundToken { + if nextToken != getRuleGroupNextToken(grp.File(), grp.Name()) { + continue + } + foundToken = true + } + if len(rgSet) > 0 { if _, ok := rgSet[grp.Name()]; !ok { continue @@ -1504,6 +1522,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { if !excludeAlerts { activeAlerts = rulesAlertsToAPIAlerts(rule.ActiveAlerts()) } + enrichedRule = AlertingRule{ State: rule.State().String(), Name: rule.Name(), @@ -1519,6 +1538,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { LastEvaluation: rule.GetEvaluationTimestamp(), Type: "alerting", } + case *rules.RecordingRule: if !returnRecording { break @@ -1545,9 +1565,20 @@ func (api *API) rules(r *http.Request) apiFuncResult { // If the rule group response has no rules, skip it - this means we filtered all the rules of this group. if len(apiRuleGroup.Rules) > 0 { + if maxGroups > 0 && len(rgs) == int(maxGroups) { + // We've reached the capacity of our page plus one. That means that for sure there will be at least one + // rule group in a subsequent request. Therefore a next token is required. + res.GroupNextToken = getRuleGroupNextToken(grp.File(), grp.Name()) + break + } rgs = append(rgs, apiRuleGroup) } } + + if maxGroups > 0 && nextToken != "" && !foundToken { + return invalidParamError(fmt.Errorf("invalid group_next_token '%v'. were rule groups changed?", nextToken), "group_next_token") + } + res.RuleGroups = rgs return apiFuncResult{res, nil, nil, nil} } @@ -1566,6 +1597,44 @@ func parseExcludeAlerts(r *http.Request) (bool, error) { return excludeAlerts, nil } +func parseListRulesPaginationRequest(r *http.Request) (int64, string, *apiFuncResult) { + var ( + parsedMaxGroups int64 = -1 + err error + ) + maxGroups := r.URL.Query().Get("group_limit") + nextToken := r.URL.Query().Get("group_next_token") + + if nextToken != "" && maxGroups == "" { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be present in order to paginate over the groups"), "group_next_token") + return -1, "", &errResult + } + + if maxGroups != "" { + parsedMaxGroups, err = strconv.ParseInt(maxGroups, 10, 32) + if err != nil { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be a valid number: %w", err), "group_limit") + return -1, "", &errResult + } + if parsedMaxGroups <= 0 { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be greater than 0"), "group_limit") + return -1, "", &errResult + } + } + + if parsedMaxGroups > 0 { + return parsedMaxGroups, nextToken, nil + } + + return -1, "", nil +} + +func getRuleGroupNextToken(file, group string) string { + h := sha1.New() + h.Write([]byte(file + ";" + group)) + return hex.EncodeToString(h.Sum(nil)) +} + type prometheusConfig struct { YAML string `json:"yaml"` } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 7ac2fe569..35ad4a9ad 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -338,7 +338,15 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { ShouldRestore: false, Opts: opts, }) - m.ruleGroups = []*rules.Group{group} + group2 := rules.NewGroup(rules.GroupOptions{ + Name: "grp2", + File: "/path/to/file", + Interval: time.Second, + Rules: []rules.Rule{r[0]}, + ShouldRestore: false, + Opts: opts, + }) + m.ruleGroups = []*rules.Group{group, group2} } func (m *rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { @@ -2241,6 +2249,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2329,6 +2356,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: nil, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2410,6 +2456,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2681,6 +2746,159 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, zeroFunc: rulesZeroFunc, }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + }, + response: &RuleDiscovery{ + GroupNextToken: getRuleGroupNextToken("/path/to/file", "grp2"), + RuleGroups: []*RuleGroup{ + { + Name: "grp", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric6", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("testlabel", "rule"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric7", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("templatedlabel", "{{ $externalURL }}"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + RecordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Health: "ok", + Type: "recording", + }, + RecordingRule{ + Name: "recording-rule-2", + Query: "vector(1)", + Labels: labels.FromStrings("testlabel", "rule"), + Health: "ok", + Type: "recording", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { // invalid pagination request + endpoint: api.rules, + query: url.Values{ + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // invalid group_limit + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"0"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // Pagination token is invalid due to changes in the rule groups + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/removed/file", "notfound")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, { endpoint: api.queryExemplars, query: url.Values{ From bb27c6b8966efbf3213f7fad787e4efed3b1c53d Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 22 Oct 2024 09:31:02 +0100 Subject: [PATCH 292/339] Create release 2.55.0 Signed-off-by: Bryan Boreham --- CHANGELOG.md | 7 ++----- VERSION | 2 +- web/ui/module/codemirror-promql/package.json | 4 ++-- web/ui/module/lezer-promql/package.json | 2 +- web/ui/package-lock.json | 14 +++++++------- web/ui/package.json | 2 +- web/ui/react-app/package.json | 4 ++-- 7 files changed, 16 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a2b7bf9f..dd5d4bd21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,9 @@ ## unreleased -## 2.55.0-rc.1 / 2024-10-16 +## 2.55.0 / 2024-10-22 * [FEATURE] PromQL: Add experimental `info` function. #14495 -* [BUGFIX] PromQL: make sort_by_label stable. #14985 - -## 2.55.0-rc.0 / 2024-09-20 - * [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 * [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 * [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 @@ -31,6 +27,7 @@ * [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 * [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120 * [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 +* [BUGFIX] PromQL: make sort_by_label stable. #14985 * [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 * [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 * [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 diff --git a/VERSION b/VERSION index 2e8119e64..c2576f162 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.55.0-rc.1 +2.55.0 diff --git a/web/ui/module/codemirror-promql/package.json b/web/ui/module/codemirror-promql/package.json index c3ec6a343..888a4c5e5 100644 --- a/web/ui/module/codemirror-promql/package.json +++ b/web/ui/module/codemirror-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/codemirror-promql", - "version": "0.55.0-rc.1", + "version": "0.55.0", "description": "a CodeMirror mode for the PromQL language", "types": "dist/esm/index.d.ts", "module": "dist/esm/index.js", @@ -29,7 +29,7 @@ }, "homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md", "dependencies": { - "@prometheus-io/lezer-promql": "0.55.0-rc.1", + "@prometheus-io/lezer-promql": "0.55.0", "lru-cache": "^7.18.3" }, "devDependencies": { diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index f9306adcb..b234426dd 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/lezer-promql", - "version": "0.55.0-rc.1", + "version": "0.55.0", "description": "lezer-based PromQL grammar", "main": "dist/index.cjs", "type": "module", diff --git a/web/ui/package-lock.json b/web/ui/package-lock.json index 381458f1b..c62896bc3 100644 --- a/web/ui/package-lock.json +++ b/web/ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "prometheus-io", - "version": "0.55.0-rc.1", + "version": "0.55.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "prometheus-io", - "version": "0.55.0-rc.1", + "version": "0.55.0", "workspaces": [ "react-app", "module/*" @@ -30,10 +30,10 @@ }, "module/codemirror-promql": { "name": "@prometheus-io/codemirror-promql", - "version": "0.55.0-rc.1", + "version": "0.55.0", "license": "Apache-2.0", "dependencies": { - "@prometheus-io/lezer-promql": "0.55.0-rc.1", + "@prometheus-io/lezer-promql": "0.55.0", "lru-cache": "^7.18.3" }, "devDependencies": { @@ -69,7 +69,7 @@ }, "module/lezer-promql": { "name": "@prometheus-io/lezer-promql", - "version": "0.55.0-rc.1", + "version": "0.55.0", "license": "Apache-2.0", "devDependencies": { "@lezer/generator": "^1.7.1", @@ -19352,7 +19352,7 @@ }, "react-app": { "name": "@prometheus-io/app", - "version": "0.55.0-rc.1", + "version": "0.55.0", "dependencies": { "@codemirror/autocomplete": "^6.17.0", "@codemirror/commands": "^6.6.0", @@ -19370,7 +19370,7 @@ "@lezer/lr": "^1.4.2", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.55.0-rc.1", + "@prometheus-io/codemirror-promql": "0.55.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", diff --git a/web/ui/package.json b/web/ui/package.json index 9371d795e..135b793ba 100644 --- a/web/ui/package.json +++ b/web/ui/package.json @@ -28,5 +28,5 @@ "ts-jest": "^29.2.2", "typescript": "^4.9.5" }, - "version": "0.55.0-rc.1" + "version": "0.55.0" } diff --git a/web/ui/react-app/package.json b/web/ui/react-app/package.json index d91adb7ed..79cb86b31 100644 --- a/web/ui/react-app/package.json +++ b/web/ui/react-app/package.json @@ -1,6 +1,6 @@ { "name": "@prometheus-io/app", - "version": "0.55.0-rc.1", + "version": "0.55.0", "private": true, "dependencies": { "@codemirror/autocomplete": "^6.17.0", @@ -19,7 +19,7 @@ "@lezer/lr": "^1.4.2", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.55.0-rc.1", + "@prometheus-io/codemirror-promql": "0.55.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", From eb523a6b29469d2753f56e9a92611a19161d1bc2 Mon Sep 17 00:00:00 2001 From: machine424 Date: Wed, 25 Sep 2024 20:02:52 +0200 Subject: [PATCH 293/339] fix(storage/mergeQuerier): add a reproducer for data race that occurs when one of the queriers alters the passed matchers and propose a fix Signed-off-by: machine424 --- storage/merge.go | 8 ++++++++ tsdb/querier_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/storage/merge.go b/storage/merge.go index 2424b26ab..b6980fb2f 100644 --- a/storage/merge.go +++ b/storage/merge.go @@ -153,13 +153,21 @@ func (q *mergeGenericQuerier) Select(ctx context.Context, sortSeries bool, hints ) // Schedule all Selects for all queriers we know about. for _, querier := range q.queriers { + // copy the matchers as some queriers may alter the slice. + // See https://github.com/prometheus/prometheus/issues/14723 + // matchersCopy := make([]*labels.Matcher, len(matchers)) + // copy(matchersCopy, matchers) + wg.Add(1) go func(qr genericQuerier) { + // go func(qr genericQuerier, m []*labels.Matcher) { defer wg.Done() // We need to sort for NewMergeSeriesSet to work. + // seriesSetChan <- qr.Select(ctx, true, hints, m...) seriesSetChan <- qr.Select(ctx, true, hints, matchers...) }(querier) + // }(querier, matchersCopy) } go func() { wg.Wait() diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 77772937a..c52d6fed9 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -3787,3 +3787,29 @@ func (m mockReaderOfLabels) Series(storage.SeriesRef, *labels.ScratchBuilder, *[ func (m mockReaderOfLabels) Symbols() index.StringIter { panic("Series called") } + +// TestMergeQuerierConcurrentSelectMatchers reproduces the data race bug from +// https://github.com/prometheus/prometheus/issues/14723, when one of the queriers (blockQuerier in this case) +// alters the passed matchers. +func TestMergeQuerierConcurrentSelectMatchers(t *testing.T) { + block, err := OpenBlock(nil, createBlock(t, t.TempDir(), genSeries(1, 1, 0, 1)), nil) + require.NoError(t, err) + p, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + // A secondary querier is required to enable concurrent select; a blockQuerier is used for simplicity. + s, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + originalMatchers := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "baz", ".*"), + labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"), + } + matchers := append([]*labels.Matcher{}, originalMatchers...) + + mergedQuerier := storage.NewMergeQuerier([]storage.Querier{p}, []storage.Querier{s}, storage.ChainedSeriesMerge) + defer mergedQuerier.Close() + mergedQuerier.Select(context.Background(), false, nil, matchers...) + + require.Equal(t, originalMatchers, matchers) +} From cebcdce78a7412c8821e9b1e794f0c2b5e714043 Mon Sep 17 00:00:00 2001 From: machine424 Date: Fri, 27 Sep 2024 16:03:50 +0200 Subject: [PATCH 294/339] fix(storage/mergeQuerier): copy the matcjers slice before passing it to queriers as some of them may alter it. Signed-off-by: machine424 --- storage/merge.go | 13 +++++-------- tsdb/querier_test.go | 8 +++++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/storage/merge.go b/storage/merge.go index b6980fb2f..a4d0934b1 100644 --- a/storage/merge.go +++ b/storage/merge.go @@ -155,19 +155,16 @@ func (q *mergeGenericQuerier) Select(ctx context.Context, sortSeries bool, hints for _, querier := range q.queriers { // copy the matchers as some queriers may alter the slice. // See https://github.com/prometheus/prometheus/issues/14723 - // matchersCopy := make([]*labels.Matcher, len(matchers)) - // copy(matchersCopy, matchers) + matchersCopy := make([]*labels.Matcher, len(matchers)) + copy(matchersCopy, matchers) wg.Add(1) - go func(qr genericQuerier) { - // go func(qr genericQuerier, m []*labels.Matcher) { + go func(qr genericQuerier, m []*labels.Matcher) { defer wg.Done() // We need to sort for NewMergeSeriesSet to work. - // seriesSetChan <- qr.Select(ctx, true, hints, m...) - seriesSetChan <- qr.Select(ctx, true, hints, matchers...) - }(querier) - // }(querier, matchersCopy) + seriesSetChan <- qr.Select(ctx, true, hints, m...) + }(querier, matchersCopy) } go func() { wg.Wait() diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index c52d6fed9..aca6c845b 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -3794,6 +3794,9 @@ func (m mockReaderOfLabels) Symbols() index.StringIter { func TestMergeQuerierConcurrentSelectMatchers(t *testing.T) { block, err := OpenBlock(nil, createBlock(t, t.TempDir(), genSeries(1, 1, 0, 1)), nil) require.NoError(t, err) + defer func() { + require.NoError(t, block.Close()) + }() p, err := NewBlockQuerier(block, 0, 1) require.NoError(t, err) @@ -3808,7 +3811,10 @@ func TestMergeQuerierConcurrentSelectMatchers(t *testing.T) { matchers := append([]*labels.Matcher{}, originalMatchers...) mergedQuerier := storage.NewMergeQuerier([]storage.Querier{p}, []storage.Querier{s}, storage.ChainedSeriesMerge) - defer mergedQuerier.Close() + defer func() { + require.NoError(t, mergedQuerier.Close()) + }() + mergedQuerier.Select(context.Background(), false, nil, matchers...) require.Equal(t, originalMatchers, matchers) From 3afcda82befc41b2a7834069593c2a6dc24a6e2c Mon Sep 17 00:00:00 2001 From: alexgreenbank Date: Tue, 22 Oct 2024 14:19:01 +0100 Subject: [PATCH 295/339] docs: add keep_firing_for in alerting rules Signed-off-by: alexgreenbank --- docs/configuration/alerting_rules.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md index 4d7c75e49..cd33dba8e 100644 --- a/docs/configuration/alerting_rules.md +++ b/docs/configuration/alerting_rules.md @@ -27,6 +27,7 @@ groups: - alert: HighRequestLatency expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 for: 10m + keep_firing_for: 5m labels: severity: page annotations: @@ -40,6 +41,13 @@ the alert continues to be active during each evaluation for 10 minutes before firing the alert. Elements that are active, but not firing yet, are in the pending state. Alerting rules without the `for` clause will become active on the first evaluation. +There is also an optional `keep_firing_for` clause that tells Prometheus to keep +this alert firing for the specified duration after the firing condition was last met. +This can be used to prevent situations such as flapping alerts, false resolutions +due to lack of data loss, etc. Alerting rules without the `keep_firing_for` clause +will deactivate on the first evaluation where the condition is not met (assuming +any optional `for` duration desribed above has been satisfied). + The `labels` clause allows specifying a set of additional labels to be attached to the alert. Any existing conflicting labels will be overwritten. The label values can be templated. From 1b4e7f74e6e14832090edf1cce9690bcfbc7b95c Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Tue, 22 Oct 2024 15:24:36 +0200 Subject: [PATCH 296/339] feat(tools): add debug printouts to rules unit testing (#15196) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * promtool: Add debug flag for rule tests This makes it print out the tsdb state (both input_series and rules that are run) at the end of a test, making reasoning about tests much easier. Signed-off-by: David Leadbeater * Reuse generated test name from junit testing Signed-off-by: György Krajcsovits --------- Signed-off-by: David Leadbeater Signed-off-by: György Krajcsovits Co-authored-by: David Leadbeater --- cmd/promtool/main.go | 2 ++ cmd/promtool/unittest.go | 47 +++++++++++++++++++++++++++++------ cmd/promtool/unittest_test.go | 6 ++--- docs/command-line/promtool.md | 1 + 4 files changed, 46 insertions(+), 10 deletions(-) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 26618855c..49676ee5c 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -217,6 +217,7 @@ func main() { "test-rule-file", "The unit test file.", ).Required().ExistingFiles() + testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool() testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool() defaultDBPath := "data/" @@ -392,6 +393,7 @@ func main() { }, *testRulesRun, *testRulesDiff, + *testRulesDebug, *testRulesFiles...), ) diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 667e74806..78dacdc56 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -46,11 +46,11 @@ import ( // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { - return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, files...) +func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { + return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, files...) } -func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { +func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { failed := false junit := &junitxml.JUnitXML{} @@ -60,7 +60,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, } for _, f := range files { - if errs := ruleUnitTest(f, queryOpts, run, diffFlag, junit.Suite(f)); errs != nil { + if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, junit.Suite(f)); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) @@ -82,7 +82,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, return successExitCode } -func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool, ts *junitxml.TestSuite) []error { +func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug bool, ts *junitxml.TestSuite) []error { b, err := os.ReadFile(filename) if err != nil { ts.Abort(err) @@ -131,7 +131,7 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg if t.Interval == 0 { t.Interval = unitTestInp.EvaluationInterval } - ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...) + ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, unitTestInp.RuleFiles...) if ers != nil { for _, e := range ers { tc.Fail(e.Error()) @@ -198,7 +198,14 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { +func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug bool, ruleFiles ...string) (outErr []error) { + if debug { + testStart := time.Now() + fmt.Printf("DEBUG: Starting test %s\n", testname) + defer func() { + fmt.Printf("DEBUG: Test %s finished, took %v\n", testname, time.Since(testStart)) + }() + } // Setup testing suite. suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts) if err != nil { @@ -482,6 +489,32 @@ Outer: } } + if debug { + ts := tg.maxEvalTime() + // Potentially a test can be specified at a time with fractional seconds, + // which PromQL cannot represent, so round up to the next whole second. + ts = (ts + time.Second).Truncate(time.Second) + expr := fmt.Sprintf(`{__name__=~".+"}[%v]`, ts) + q, err := suite.QueryEngine().NewInstantQuery(context.Background(), suite.Queryable(), nil, expr, mint.Add(ts)) + if err != nil { + fmt.Printf("DEBUG: Failed querying, expr: %q, err: %v\n", expr, err) + return errs + } + res := q.Exec(suite.Context()) + if res.Err != nil { + fmt.Printf("DEBUG: Failed query exec, expr: %q, err: %v\n", expr, res.Err) + return errs + } + switch v := res.Value.(type) { + case promql.Matrix: + fmt.Printf("DEBUG: Dump of all data (input_series and rules) at %v:\n", ts) + fmt.Println(v.String()) + default: + fmt.Printf("DEBUG: Got unexpected type %T\n", v) + return errs + } + } + if len(errs) > 0 { return errs } diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index 9bbac28e9..9b73dcdc1 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -141,14 +141,14 @@ func TestRulesUnitTest(t *testing.T) { reuseCount[tt.want] += len(tt.args.files) } t.Run(tt.name, func(t *testing.T) { - if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want { + if got := RulesUnitTest(tt.queryOpts, nil, false, false, tt.args.files...); got != tt.want { t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want) } }) } t.Run("Junit xml output ", func(t *testing.T) { var buf bytes.Buffer - if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, reuseFiles...); got != 1 { + if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, reuseFiles...); got != 1 { t.Errorf("RulesUnitTestResults() = %v, want 1", got) } var test junitxml.JUnitXML @@ -230,7 +230,7 @@ func TestRulesUnitTestRun(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...) + got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.args.files...) require.Equal(t, tt.want, got) }) } diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 996a99655..5e2a8f6bb 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -462,6 +462,7 @@ Unit tests for rules. | Flag | Description | Default | | --- | --- | --- | | --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | +| --debug | Enable unit test debugging. | `false` | | --diff | [Experimental] Print colored differential output between expected & received output. | `false` | From aa81210c8b90cff2c3e72d1e9a3128115889b276 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Tue, 22 Oct 2024 18:49:25 +0200 Subject: [PATCH 297/339] NHCB scrape: refactor state handling and speed up scrape test (#15193) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * NHCB: scrape use state field and not booleans From comment https://github.com/prometheus/prometheus/pull/14978#discussion_r1800898724 Also make compareLabels read only and move storeLabels to the first processed classic histogram series. Signed-off-by: György Krajcsovits * Speed up TestConvertClassicHistogramsToNHCB 3x Reduce the startup time and timeouts Signed-off-by: György Krajcsovits * lint fix Signed-off-by: György Krajcsovits --------- Signed-off-by: György Krajcsovits --- model/textparse/nhcbparse.go | 69 ++++++++++++++++++------------------ scrape/scrape_test.go | 6 ++-- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go index 7c2db6990..22384f1ec 100644 --- a/model/textparse/nhcbparse.go +++ b/model/textparse/nhcbparse.go @@ -28,6 +28,14 @@ import ( "github.com/prometheus/prometheus/util/convertnhcb" ) +type collectionState int + +const ( + stateStart collectionState = iota + stateCollecting + stateEmitting +) + // The NHCBParser wraps a Parser and converts classic histograms to native // histograms with custom buckets. // @@ -48,6 +56,9 @@ type NHCBParser struct { // Labels builder. builder labels.ScratchBuilder + // State of the parser. + state collectionState + // Caches the values from the underlying parser. // For Series and Histogram. bytes []byte @@ -64,9 +75,9 @@ type NHCBParser struct { // Caches the entry itself if we are inserting a converted NHCB // halfway through. - entry Entry - err error - justInsertedNHCB bool + entry Entry + err error + // Caches the values and metric for the inserted converted NHCB. bytesNHCB []byte hNHCB *histogram.Histogram @@ -77,11 +88,10 @@ type NHCBParser struct { // Collates values from the classic histogram series to build // the converted histogram later. - tempLsetNHCB labels.Labels - tempNHCB convertnhcb.TempHistogram - tempExemplars []exemplar.Exemplar - tempExemplarCount int - isCollationInProgress bool + tempLsetNHCB labels.Labels + tempNHCB convertnhcb.TempHistogram + tempExemplars []exemplar.Exemplar + tempExemplarCount int // Remembers the last base histogram metric name (assuming it's // a classic histogram) so we can tell if the next float series @@ -105,7 +115,7 @@ func (p *NHCBParser) Series() ([]byte, *int64, float64) { } func (p *NHCBParser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) { - if p.justInsertedNHCB { + if p.state == stateEmitting { return p.bytesNHCB, p.ts, p.hNHCB, p.fhNHCB } return p.bytes, p.ts, p.h, p.fh @@ -128,7 +138,7 @@ func (p *NHCBParser) Comment() []byte { } func (p *NHCBParser) Metric(l *labels.Labels) string { - if p.justInsertedNHCB { + if p.state == stateEmitting { *l = p.lsetNHCB return p.metricStringNHCB } @@ -137,7 +147,7 @@ func (p *NHCBParser) Metric(l *labels.Labels) string { } func (p *NHCBParser) Exemplar(ex *exemplar.Exemplar) bool { - if p.justInsertedNHCB { + if p.state == stateEmitting { if len(p.exemplars) == 0 { return false } @@ -153,8 +163,8 @@ func (p *NHCBParser) CreatedTimestamp() *int64 { } func (p *NHCBParser) Next() (Entry, error) { - if p.justInsertedNHCB { - p.justInsertedNHCB = false + if p.state == stateEmitting { + p.state = stateStart if p.entry == EntrySeries { isNHCB := p.handleClassicHistogramSeries(p.lset) if isNHCB && !p.keepClassicHistograms { @@ -202,34 +212,21 @@ func (p *NHCBParser) Next() (Entry, error) { } // Return true if labels have changed and we should emit the NHCB. -// Update the stored labels if the labels have changed. func (p *NHCBParser) compareLabels() bool { - // Collection not in progress. - if p.lastHistogramName == "" { - if p.typ == model.MetricTypeHistogram { - p.storeBaseLabels() - } + if p.state != stateCollecting { return false } if p.typ != model.MetricTypeHistogram { - // Different metric type, emit the NHCB. - p.lastHistogramName = "" + // Different metric type. return true } - if p.lastHistogramName != convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) { // Different metric name. - p.storeBaseLabels() return true } nextHash, _ := p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) - if p.lastHistogramLabelsHash != nextHash { - // Different label values. - p.storeBaseLabels() - return true - } - - return false + // Different label values. + return p.lastHistogramLabelsHash != nextHash } // Save the label set of the classic histogram without suffix and bucket `le` label. @@ -275,7 +272,10 @@ func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { } func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) { - p.isCollationInProgress = true + if p.state != stateCollecting { + p.storeBaseLabels() + } + p.state = stateCollecting p.tempLsetNHCB = convertnhcb.GetHistogramMetricBase(lset, suffix) p.storeExemplars() updateHist(&p.tempNHCB) @@ -308,9 +308,9 @@ func (p *NHCBParser) swapExemplars() { } // processNHCB converts the collated classic histogram series to NHCB and caches the info -// to be returned to callers. +// to be returned to callers. Retruns true if the conversion was successful. func (p *NHCBParser) processNHCB() bool { - if !p.isCollationInProgress { + if p.state != stateCollecting { return false } ub := make([]float64, 0, len(p.tempNHCB.BucketCounts)) @@ -338,7 +338,6 @@ func (p *NHCBParser) processNHCB() bool { p.lsetNHCB = p.tempLsetNHCB p.swapExemplars() p.tempNHCB = convertnhcb.NewTempHistogram() - p.isCollationInProgress = false - p.justInsertedNHCB = true + p.state = stateEmitting return true } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 9a70d7411..da964a230 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -3891,8 +3891,8 @@ metric: < JobName: "test", SampleLimit: 100, Scheme: "http", - ScrapeInterval: model.Duration(100 * time.Millisecond), - ScrapeTimeout: model.Duration(100 * time.Millisecond), + ScrapeInterval: model.Duration(50 * time.Millisecond), + ScrapeTimeout: model.Duration(25 * time.Millisecond), AlwaysScrapeClassicHistograms: tc.alwaysScrapeClassicHistograms, ConvertClassicHistogramsToNHCB: tc.convertClassicHistToNHCB, } @@ -3931,7 +3931,7 @@ metric: < })) defer ts.Close() - sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{EnableNativeHistogramsIngestion: true}, newTestScrapeMetrics(t)) + sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, newTestScrapeMetrics(t)) require.NoError(t, err) defer sp.stop() From cccbe72514e4f5a86b490f9a45edecb401f091fe Mon Sep 17 00:00:00 2001 From: Vanshika <102902652+Vanshikav123@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:28 +0530 Subject: [PATCH 298/339] TSDB: Fix some edge cases when OOO is enabled (#14710) Fix some edge cases when OOO is enabled Signed-off-by: Vanshikav123 Signed-off-by: Vanshika <102902652+Vanshikav123@users.noreply.github.com> Signed-off-by: Jesus Vazquez Co-authored-by: Jesus Vazquez --- CHANGELOG.md | 92 +++++++++++++- cmd/prometheus/main.go | 3 + rules/fixtures/rules1.yaml | 5 + rules/group.go | 4 + rules/manager_test.go | 47 ++++++++ scrape/helpers_test.go | 4 + scrape/scrape.go | 4 +- scrape/scrape_test.go | 173 ++++++++++++++++++++++++++- storage/fanout.go | 10 ++ storage/interface.go | 8 ++ storage/remote/write.go | 5 + storage/remote/write_handler_test.go | 4 + tsdb/agent/db.go | 5 + tsdb/head_append.go | 20 +++- util/teststorage/storage.go | 14 ++- 15 files changed, 388 insertions(+), 10 deletions(-) create mode 100644 rules/fixtures/rules1.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index f1321829e..72d9f7a11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,9 @@ ## unreleased * [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136 -* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 -* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 +* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 +- [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 +- [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 ## 3.0.0-beta.1 / 2024-10-09 @@ -20,7 +21,6 @@ * [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 * [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 * [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 -* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 * [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929 * [ENHANCEMENT] Consul SD: Support catalog filters. #11224 * [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 @@ -41,6 +41,10 @@ Release 3.0.0-beta.0 includes new features such as a brand new UI and UTF-8 supp As is traditional with a beta release, we do **not** recommend users install 3.0.0-beta on critical production systems, but we do want everyone to test it out and find bugs. +<<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> b10c3696c (Revert "updated changelog") * [CHANGE] UI: The old web UI has been replaced by a completely new one that is less cluttered and adds a few new features (PromLens-style tree view, better metrics explorer, "Explain" tab). However, it is still missing some features of the old UI (notably, exemplar display and heatmaps). To switch back to the old UI, you can use the feature flag `--enable-feature=old-ui` for the time being. #14872 * [CHANGE] PromQL: Range selectors and the lookback delta are now left-open, i.e. a sample coinciding with the lower time limit is excluded rather than included. #13904 * [CHANGE] Kubernetes SD: Remove support for `discovery.k8s.io/v1beta1` API version of EndpointSlice. This version is no longer served as of Kubernetes v1.25. #14365 @@ -52,6 +56,7 @@ As is traditional with a beta release, we do **not** recommend users install 3.0 * [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526 * [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643 * [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875 +<<<<<<< HEAD * [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 * [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 @@ -85,6 +90,87 @@ As is traditional with a beta release, we do **not** recommend users install 3.0 * [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 * [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 * [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 +======= +- [CHANGE] UI: The old web UI has been replaced by a completely new one that is less cluttered and adds a few new features (PromLens-style tree view, better metrics explorer, "Explain" tab). However, it is still missing some features of the old UI (notably, exemplar display and heatmaps). To switch back to the old UI, you can use the feature flag `--enable-feature=old-ui` for the time being. #14872 +- [CHANGE] PromQL: Range selectors and the lookback delta are now left-open, i.e. a sample coinciding with the lower time limit is excluded rather than included. #13904 +- [CHANGE] Kubernetes SD: Remove support for `discovery.k8s.io/v1beta1` API version of EndpointSlice. This version is no longer served as of Kubernetes v1.25. #14365 +- [CHANGE] Kubernetes SD: Remove support for `networking.k8s.io/v1beta1` API version of Ingress. This version is no longer served as of Kubernetes v1.22. #14365 +- [CHANGE] UTF-8: Enable UTF-8 support by default. Prometheus now allows all UTF-8 characters in metric and label names. The corresponding `utf8-name` feature flag has been removed. #14705 +- [CHANGE] Console: Remove example files for the console feature. Users can continue using the console feature by supplying their own JavaScript and templates. #14807 +- [CHANGE] SD: Enable the new service discovery manager by default. This SD manager does not restart unchanged discoveries upon reloading. This makes reloads faster and reduces pressure on service discoveries' sources. The corresponding `new-service-discovery-manager` feature flag has been removed. #14770 +- [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747 +- [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526 +- [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643 +- [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875 +- [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 + +## 2.55.0-rc.0 / 2024-09-20 + +- [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 +- [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 +- [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 +- [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 +- [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 +- [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200 +- [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346 +- [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403 +- [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506 +- [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706 +- [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 +- [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 +- [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 +- [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 +- [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655 +- [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 +- [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 +- [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 +- [ENHANCEMENT] API: Support multiple listening addresses. #14665 +- [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 +- [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948 +- [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 +- [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 +- [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 +- [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 +- [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766 +- [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 +- [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 +- [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 +>>>>>>> 58173ab1e (updated changelog) +======= +* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 + +## 2.55.0-rc.0 / 2024-09-20 + +* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 +* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 +* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 +* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 +* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 +* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200 +* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346 +* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403 +* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506 +* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706 +* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 +* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 +* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 +* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 +* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655 +* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 +* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 +* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 +* [ENHANCEMENT] API: Support multiple listening addresses. #14665 +* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 +* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948 +* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 +* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 +* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 +* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 +* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766 +* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 +* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 +* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 +>>>>>>> b10c3696c (Revert "updated changelog") ## 2.54.1 / 2024-08-27 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 4a70d63bf..045389770 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -1639,6 +1639,9 @@ func (s *readyStorage) Appender(ctx context.Context) storage.Appender { type notReadyAppender struct{} +// SetOptions does nothing in this appender implementation. +func (n notReadyAppender) SetOptions(opts *storage.AppendOptions) {} + func (n notReadyAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { return 0, tsdb.ErrNotReady } diff --git a/rules/fixtures/rules1.yaml b/rules/fixtures/rules1.yaml new file mode 100644 index 000000000..76fbf71f3 --- /dev/null +++ b/rules/fixtures/rules1.yaml @@ -0,0 +1,5 @@ +groups: + - name: test_1 + rules: + - record: test_2 + expr: vector(2) diff --git a/rules/group.go b/rules/group.go index e9ef2be3a..7dd046b57 100644 --- a/rules/group.go +++ b/rules/group.go @@ -75,6 +75,7 @@ type Group struct { // concurrencyController controls the rules evaluation concurrency. concurrencyController RuleConcurrencyController + appOpts *storage.AppendOptions } // GroupEvalIterationFunc is used to implement and extend rule group @@ -145,6 +146,7 @@ func NewGroup(o GroupOptions) *Group { metrics: metrics, evalIterationFunc: evalIterationFunc, concurrencyController: concurrencyController, + appOpts: &storage.AppendOptions{DiscardOutOfOrder: true}, } } @@ -564,6 +566,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { if s.H != nil { _, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H) } else { + app.SetOptions(g.appOpts) _, err = app.Append(0, s.Metric, s.T, s.F) } @@ -660,6 +663,7 @@ func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { return } app := g.opts.Appendable.Appender(ctx) + app.SetOptions(g.appOpts) queryOffset := g.QueryOffset() for _, s := range g.staleSeries { // Rule that produced series no longer configured, mark it stale. diff --git a/rules/manager_test.go b/rules/manager_test.go index 198d6bd07..6afac993d 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -1195,6 +1195,53 @@ func countStaleNaN(t *testing.T, st storage.Storage) int { return c } +func TestRuleMovedBetweenGroups(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + storage := teststorage.New(t, 600000) + defer storage.Close() + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(opts) + ruleManager := NewManager(&ManagerOptions{ + Appendable: storage, + Queryable: storage, + QueryFunc: EngineQueryFunc(engine, storage), + Context: context.Background(), + Logger: promslog.NewNopLogger(), + }) + var stopped bool + ruleManager.start() + defer func() { + if !stopped { + ruleManager.Stop() + } + }() + + rule2 := "fixtures/rules2.yaml" + rule1 := "fixtures/rules1.yaml" + + // Load initial configuration of rules2 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule2}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated + time.Sleep(3 * time.Second) + + // Reload configuration of rules1 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule1}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated in new location and potential staleness marker + time.Sleep(3 * time.Second) + + require.Equal(t, 0, countStaleNaN(t, storage)) // Not expecting any stale markers. +} + func TestGroupHasAlertingRules(t *testing.T) { tests := []struct { group *Group diff --git a/scrape/helpers_test.go b/scrape/helpers_test.go index 4f7918f79..12a56d707 100644 --- a/scrape/helpers_test.go +++ b/scrape/helpers_test.go @@ -43,6 +43,8 @@ func (a nopAppendable) Appender(_ context.Context) storage.Appender { type nopAppender struct{} +func (a nopAppender) SetOptions(opts *storage.AppendOptions) {} + func (a nopAppender) Append(storage.SeriesRef, labels.Labels, int64, float64) (storage.SeriesRef, error) { return 0, nil } @@ -114,6 +116,8 @@ type collectResultAppender struct { pendingMetadata []metadata.Metadata } +func (a *collectResultAppender) SetOptions(opts *storage.AppendOptions) {} + func (a *collectResultAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() diff --git a/scrape/scrape.go b/scrape/scrape.go index f5f02d245..7e270bb3a 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -1864,7 +1864,9 @@ loop: if err == nil { sl.cache.forEachStale(func(lset labels.Labels) bool { // Series no longer exposed, mark it stale. + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) _, err = app.Append(0, lset, defTime, math.Float64frombits(value.StaleNaN)) + app.SetOptions(nil) switch { case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): // Do not count these in logging, as this is expected if a target @@ -1970,7 +1972,7 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err error) { ts := timestamp.FromTime(start) - + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) stale := math.Float64frombits(value.StaleNaN) b := labels.NewBuilder(labels.EmptyLabels()) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index da964a230..f75e1db89 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -86,6 +86,97 @@ func TestNewScrapePool(t *testing.T) { require.NotNil(t, sp.newLoop, "newLoop function not initialized.") } +func TestStorageHandlesOutOfOrderTimestamps(t *testing.T) { + // Test with default OutOfOrderTimeWindow (0) + t.Run("Out-Of-Order Sample Disabled", func(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + runScrapeLoopTest(t, s, false) + }) + + // Test with specific OutOfOrderTimeWindow (600000) + t.Run("Out-Of-Order Sample Enabled", func(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + runScrapeLoopTest(t, s, true) + }) +} + +func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrder bool) { + // Create an appender for adding samples to the storage. + app := s.Appender(context.Background()) + capp := &collectResultAppender{next: app} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return capp }, 0) + + // Current time for generating timestamps. + now := time.Now() + + // Calculate timestamps for the samples based on the current time. + now = now.Truncate(time.Minute) // round down the now timestamp to the nearest minute + timestampInorder1 := now + timestampOutOfOrder := now.Add(-5 * time.Minute) + timestampInorder2 := now.Add(5 * time.Minute) + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1`), "", timestampInorder1) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 2`), "", timestampOutOfOrder) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 3`), "", timestampInorder2) + require.NoError(t, err) + + require.NoError(t, slApp.Commit()) + + // Query the samples back from the storage. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + // Use a matcher to filter the metric name. + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + + // Define the expected results + want := []floatSample{ + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder1), + f: 1, + }, + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder2), + f: 3, + }, + } + + if expectOutOfOrder { + require.NotEqual(t, want, results, "Expected results to include out-of-order sample:\n%s", results) + } else { + require.Equal(t, want, results, "Appended samples not as expected:\n%s", results) + } +} + func TestDroppedTargetsList(t *testing.T) { var ( app = &nopAppendable{} @@ -1157,6 +1248,87 @@ func BenchmarkScrapeLoopAppendOM(b *testing.B) { } } +func TestSetOptionsHandlingStaleness(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + signal := make(chan struct{}, 1) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Function to run the scrape loop + runScrapeLoop := func(ctx context.Context, t *testing.T, cue int, action func(*scrapeLoop)) { + var ( + scraper = &testScraper{} + app = func(ctx context.Context) storage.Appender { + return s.Appender(ctx) + } + ) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + numScrapes := 0 + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + if numScrapes == cue { + action(sl) + } + w.Write([]byte(fmt.Sprintf("metric_a{a=\"1\",b=\"1\"} %d\n", 42+numScrapes))) + return nil + } + sl.run(nil) + } + go func() { + runScrapeLoop(ctx, t, 2, func(sl *scrapeLoop) { + go sl.stop() + // Wait a bit then start a new target. + time.Sleep(100 * time.Millisecond) + go func() { + runScrapeLoop(ctx, t, 4, func(_ *scrapeLoop) { + cancel() + }) + signal <- struct{}{} + }() + }) + }() + + select { + case <-signal: + case <-time.After(10 * time.Second): + t.Fatalf("Scrape wasn't stopped.") + } + + ctx1, cancel := context.WithCancel(context.Background()) + defer cancel() + + q, err := s.Querier(0, time.Now().UnixNano()) + + require.NoError(t, err) + defer q.Close() + + series := q.Select(ctx1, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + var c int + for _, s := range results { + if value.IsStaleNaN(s.f) { + c++ + } + } + require.Equal(t, 0, c, "invalid count of staleness markers after stopping the engine") +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { appender := &collectResultAppender{} var ( @@ -4032,7 +4204,6 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t * case <-time.After(5 * time.Second): t.Fatalf("Scrape wasn't stopped.") } - // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for // each scrape successful or not. require.Len(t, appender.resultFloats, 27, "Appended samples not as expected:\n%s", appender) diff --git a/storage/fanout.go b/storage/fanout.go index 6ff517895..4d076788a 100644 --- a/storage/fanout.go +++ b/storage/fanout.go @@ -147,6 +147,16 @@ type fanoutAppender struct { secondaries []Appender } +// SetOptions propagates the hints to both primary and secondary appenders. +func (f *fanoutAppender) SetOptions(opts *AppendOptions) { + if f.primary != nil { + f.primary.SetOptions(opts) + } + for _, appender := range f.secondaries { + appender.SetOptions(opts) + } +} + func (f *fanoutAppender) Append(ref SeriesRef, l labels.Labels, t int64, v float64) (SeriesRef, error) { ref, err := f.primary.Append(ref, l, t, v) if err != nil { diff --git a/storage/interface.go b/storage/interface.go index b7ef14ce9..56bb53dfe 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -243,6 +243,10 @@ func (f QueryableFunc) Querier(mint, maxt int64) (Querier, error) { return f(mint, maxt) } +type AppendOptions struct { + DiscardOutOfOrder bool +} + // Appender provides batched appends against a storage. // It must be completed with a call to Commit or Rollback and must not be reused afterwards. // @@ -271,6 +275,10 @@ type Appender interface { // Appender has to be discarded after rollback. Rollback() error + // SetOptions configures the appender with specific append options such as + // discarding out-of-order samples even if out-of-order is enabled in the TSDB. + SetOptions(opts *AppendOptions) + ExemplarAppender HistogramAppender MetadataUpdater diff --git a/storage/remote/write.go b/storage/remote/write.go index 20e4ed10d..00e4fa3a0 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -278,6 +278,7 @@ func (rws *WriteStorage) Close() error { type timestampTracker struct { writeStorage *WriteStorage + appendOptions *storage.AppendOptions samples int64 exemplars int64 histograms int64 @@ -285,6 +286,10 @@ type timestampTracker struct { highestRecvTimestamp *maxTimestamp } +func (t *timestampTracker) SetOptions(opts *storage.AppendOptions) { + t.appendOptions = opts +} + // Append implements storage.Appender. func (t *timestampTracker) Append(_ storage.SeriesRef, _ labels.Labels, ts int64, _ float64) (storage.SeriesRef, error) { t.samples++ diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index d91949131..580c7c143 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -833,6 +833,10 @@ func (m *mockAppendable) Appender(_ context.Context) storage.Appender { return m } +func (m *mockAppendable) SetOptions(opts *storage.AppendOptions) { + panic("unimplemented") +} + func (m *mockAppendable) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if m.appendSampleErr != nil { return 0, m.appendSampleErr diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index b2c40b201..5de84c93a 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -763,6 +763,7 @@ func (db *DB) Close() error { type appender struct { *DB + hints *storage.AppendOptions pendingSeries []record.RefSeries pendingSamples []record.RefSample @@ -783,6 +784,10 @@ type appender struct { floatHistogramSeries []*memSeries } +func (a *appender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts +} + func (a *appender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { // series references and chunk references are identical for agent mode. headRef := chunks.HeadSeriesRef(ref) diff --git a/tsdb/head_append.go b/tsdb/head_append.go index adfd5d4bf..170e74044 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -40,6 +40,12 @@ type initAppender struct { var _ storage.GetRef = &initAppender{} +func (a *initAppender) SetOptions(opts *storage.AppendOptions) { + if a.app != nil { + a.app.SetOptions(opts) + } +} + func (a *initAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if a.app != nil { return a.app.Append(ref, lset, t, v) @@ -326,6 +332,11 @@ type headAppender struct { appendID, cleanupAppendIDsBelow uint64 closed bool + hints *storage.AppendOptions +} + +func (a *headAppender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts } func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { @@ -359,13 +370,18 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 } s.Lock() + + defer s.Unlock() // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise // to skip that sample from the WAL and write only in the WBL. - _, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) + isOOO, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) if err == nil { + if isOOO && a.hints != nil && a.hints.DiscardOutOfOrder { + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + return 0, storage.ErrOutOfOrderSample + } s.pendingCommit = true } - s.Unlock() if delta > 0 { a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) } diff --git a/util/teststorage/storage.go b/util/teststorage/storage.go index 7d1f9dda2..e15d591e0 100644 --- a/util/teststorage/storage.go +++ b/util/teststorage/storage.go @@ -30,15 +30,15 @@ import ( // New returns a new TestStorage for testing purposes // that removes all associated files on closing. -func New(t testutil.T) *TestStorage { - stor, err := NewWithError() +func New(t testutil.T, outOfOrderTimeWindow ...int64) *TestStorage { + stor, err := NewWithError(outOfOrderTimeWindow...) require.NoError(t, err) return stor } // NewWithError returns a new TestStorage for user facing tests, which reports // errors directly. -func NewWithError() (*TestStorage, error) { +func NewWithError(outOfOrderTimeWindow ...int64) (*TestStorage, error) { dir, err := os.MkdirTemp("", "test_storage") if err != nil { return nil, fmt.Errorf("opening test directory: %w", err) @@ -51,6 +51,14 @@ func NewWithError() (*TestStorage, error) { opts.MaxBlockDuration = int64(24 * time.Hour / time.Millisecond) opts.RetentionDuration = 0 opts.EnableNativeHistograms = true + + // Set OutOfOrderTimeWindow if provided, otherwise use default (0) + if len(outOfOrderTimeWindow) > 0 { + opts.OutOfOrderTimeWindow = outOfOrderTimeWindow[0] + } else { + opts.OutOfOrderTimeWindow = 0 // Default value is zero + } + db, err := tsdb.Open(dir, nil, nil, opts, tsdb.NewDBStats()) if err != nil { return nil, fmt.Errorf("opening test storage: %w", err) From 2182b832711586f8d8a4c34f5820ea9265d818b6 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Thu, 24 Oct 2024 07:38:58 +0200 Subject: [PATCH 299/339] feat(nhcb): implement created timestamp handling (#15198) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call through to the underlaying parser if we are not in a histogram and the entry is a series or exponential native histogram. Otherwise store and retrieve CT for NHCB. * fix(omparser): losing exemplars when CT is parsed Fixes: #15137 Ignore exemplars while peeking ahead during CT parsing. Simplify state reset with defer(). Signed-off-by: György Krajcsovits --- model/textparse/interface_test.go | 6 ++-- model/textparse/nhcbparse.go | 36 ++++++++++++++-------- model/textparse/nhcbparse_test.go | 46 ++++++++++++++++++----------- model/textparse/openmetricsparse.go | 44 +++++++++++++++++++-------- 4 files changed, 88 insertions(+), 44 deletions(-) diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index 6136fbc91..72c8284f2 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -239,13 +239,13 @@ func testParse(t *testing.T, p Parser) (ret []parsedEntry) { } p.Metric(&got.lset) - for e := (exemplar.Exemplar{}); p.Exemplar(&e); { - got.es = append(got.es, e) - } // Parser reuses int pointer. if ct := p.CreatedTimestamp(); ct != nil { got.ct = int64p(*ct) } + for e := (exemplar.Exemplar{}); p.Exemplar(&e); { + got.es = append(got.es, e) + } case EntryType: m, got.typ = p.Type() got.m = string(m) diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go index 22384f1ec..eab9fa7e6 100644 --- a/model/textparse/nhcbparse.go +++ b/model/textparse/nhcbparse.go @@ -84,6 +84,7 @@ type NHCBParser struct { fhNHCB *histogram.FloatHistogram lsetNHCB labels.Labels exemplars []exemplar.Exemplar + ctNHCB *int64 metricStringNHCB string // Collates values from the classic histogram series to build @@ -92,6 +93,7 @@ type NHCBParser struct { tempNHCB convertnhcb.TempHistogram tempExemplars []exemplar.Exemplar tempExemplarCount int + tempCT *int64 // Remembers the last base histogram metric name (assuming it's // a classic histogram) so we can tell if the next float series @@ -159,6 +161,16 @@ func (p *NHCBParser) Exemplar(ex *exemplar.Exemplar) bool { } func (p *NHCBParser) CreatedTimestamp() *int64 { + switch p.state { + case stateStart: + if p.entry == EntrySeries || p.entry == EntryHistogram { + return p.parser.CreatedTimestamp() + } + case stateCollecting: + return p.parser.CreatedTimestamp() + case stateEmitting: + return p.ctNHCB + } return nil } @@ -174,22 +186,20 @@ func (p *NHCBParser) Next() (Entry, error) { } return p.entry, p.err } - et, err := p.parser.Next() - if err != nil { - if errors.Is(err, io.EOF) && p.processNHCB() { - p.entry = et - p.err = err + + p.entry, p.err = p.parser.Next() + if p.err != nil { + if errors.Is(p.err, io.EOF) && p.processNHCB() { return EntryHistogram, nil } - return EntryInvalid, err + return EntryInvalid, p.err } - switch et { + switch p.entry { case EntrySeries: p.bytes, p.ts, p.value = p.parser.Series() p.metricString = p.parser.Metric(&p.lset) // Check the label set to see if we can continue or need to emit the NHCB. if p.compareLabels() && p.processNHCB() { - p.entry = et return EntryHistogram, nil } isNHCB := p.handleClassicHistogramSeries(p.lset) @@ -197,7 +207,7 @@ func (p *NHCBParser) Next() (Entry, error) { // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. return p.Next() } - return et, err + return p.entry, p.err case EntryHistogram: p.bytes, p.ts, p.h, p.fh = p.parser.Histogram() p.metricString = p.parser.Metric(&p.lset) @@ -205,10 +215,9 @@ func (p *NHCBParser) Next() (Entry, error) { p.bName, p.typ = p.parser.Type() } if p.processNHCB() { - p.entry = et return EntryHistogram, nil } - return et, err + return p.entry, p.err } // Return true if labels have changed and we should emit the NHCB. @@ -274,8 +283,9 @@ func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) { if p.state != stateCollecting { p.storeBaseLabels() + p.tempCT = p.parser.CreatedTimestamp() + p.state = stateCollecting } - p.state = stateCollecting p.tempLsetNHCB = convertnhcb.GetHistogramMetricBase(lset, suffix) p.storeExemplars() updateHist(&p.tempNHCB) @@ -337,7 +347,9 @@ func (p *NHCBParser) processNHCB() bool { p.bytesNHCB = []byte(p.metricStringNHCB) p.lsetNHCB = p.tempLsetNHCB p.swapExemplars() + p.ctNHCB = p.tempCT p.tempNHCB = convertnhcb.NewTempHistogram() p.state = stateEmitting + p.tempCT = nil return true } diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go index 80b65fd22..1ead2e30e 100644 --- a/model/textparse/nhcbparse_test.go +++ b/model/textparse/nhcbparse_test.go @@ -292,14 +292,14 @@ foobar{quantile="0.99"} 150.1` lset: labels.FromStrings("__name__", "foo_total"), t: int64p(1520879607789), es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, - // TODO(krajorama): ct: int64p(1520872607123), + ct: int64p(1520872607123), }, { m: `foo_total{a="b"}`, v: 17.0, lset: labels.FromStrings("__name__", "foo_total", "a", "b"), t: int64p(1520879607789), es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, - // TODO(krajorama): ct: int64p(1520872607123), + ct: int64p(1520872607123), }, { m: "bar", help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", @@ -310,22 +310,22 @@ foobar{quantile="0.99"} 150.1` m: "bar_count", v: 17.0, lset: labels.FromStrings("__name__", "bar_count"), - // TODO(krajorama): ct: int64p(1520872608124), + ct: int64p(1520872608124), }, { m: "bar_sum", v: 324789.3, lset: labels.FromStrings("__name__", "bar_sum"), - // TODO(krajorama): ct: int64p(1520872608124), + ct: int64p(1520872608124), }, { m: `bar{quantile="0.95"}`, v: 123.7, lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"), - // TODO(krajorama): ct: int64p(1520872608124), + ct: int64p(1520872608124), }, { m: `bar{quantile="0.99"}`, v: 150.0, lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"), - // TODO(krajorama): ct: int64p(1520872608124), + ct: int64p(1520872608124), }, { m: "baz", help: "Histogram with the same objective as above's summary", @@ -343,7 +343,7 @@ foobar{quantile="0.99"} 150.1` CustomValues: []float64{0.0}, // We do not store the +Inf boundary. }, lset: labels.FromStrings("__name__", "baz"), - // TODO(krajorama): ct: int64p(1520872609125), + ct: int64p(1520872609125), }, { m: "fizz_created", help: "Gauge which shouldn't be parsed as CT", @@ -371,7 +371,7 @@ foobar{quantile="0.99"} 150.1` CustomValues: []float64{0.0}, // We do not store the +Inf boundary. }, lset: labels.FromStrings("__name__", "something"), - // TODO(krajorama): ct: int64p(1520430001000), + ct: int64p(1520430001000), }, { m: `something{a="b"}`, shs: &histogram.Histogram{ @@ -383,7 +383,7 @@ foobar{quantile="0.99"} 150.1` CustomValues: []float64{0.0}, // We do not store the +Inf boundary. }, lset: labels.FromStrings("__name__", "something", "a", "b"), - // TODO(krajorama): ct: int64p(1520430002000), + ct: int64p(1520430002000), }, { m: "yum", help: "Summary with _created between sum and quantiles", @@ -394,22 +394,22 @@ foobar{quantile="0.99"} 150.1` m: `yum_count`, v: 20, lset: labels.FromStrings("__name__", "yum_count"), - // TODO(krajorama): ct: int64p(1520430003000), + ct: int64p(1520430003000), }, { m: `yum_sum`, v: 324789.5, lset: labels.FromStrings("__name__", "yum_sum"), - // TODO(krajorama): ct: int64p(1520430003000), + ct: int64p(1520430003000), }, { m: `yum{quantile="0.95"}`, v: 123.7, lset: labels.FromStrings("__name__", "yum", "quantile", "0.95"), - // TODO(krajorama): ct: int64p(1520430003000), + ct: int64p(1520430003000), }, { m: `yum{quantile="0.99"}`, v: 150.0, lset: labels.FromStrings("__name__", "yum", "quantile", "0.99"), - // TODO(krajorama): ct: int64p(1520430003000), + ct: int64p(1520430003000), }, { m: "foobar", help: "Summary with _created as the first line", @@ -420,22 +420,22 @@ foobar{quantile="0.99"} 150.1` m: `foobar_count`, v: 21, lset: labels.FromStrings("__name__", "foobar_count"), - // TODO(krajorama): ct: int64p(1520430004000), + ct: int64p(1520430004000), }, { m: `foobar_sum`, v: 324789.6, lset: labels.FromStrings("__name__", "foobar_sum"), - // TODO(krajorama): ct: int64p(1520430004000), + ct: int64p(1520430004000), }, { m: `foobar{quantile="0.95"}`, v: 123.8, lset: labels.FromStrings("__name__", "foobar", "quantile", "0.95"), - // TODO(krajorama): ct: int64p(1520430004000), + ct: int64p(1520430004000), }, { m: `foobar{quantile="0.99"}`, v: 150.1, lset: labels.FromStrings("__name__", "foobar", "quantile", "0.99"), - // TODO(krajorama): ct: int64p(1520430004000), + ct: int64p(1520430004000), }, { m: "metric", help: "foo\x00bar", @@ -555,42 +555,49 @@ func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) { }, lset: labels.FromStrings("__name__", "test_histogram"), t: int64p(1234568), + ct: int64p(1000), }, { m: "test_histogram_count", v: 175, lset: labels.FromStrings("__name__", "test_histogram_count"), t: int64p(1234568), + ct: int64p(1000), }, { m: "test_histogram_sum", v: 0.0008280461746287094, lset: labels.FromStrings("__name__", "test_histogram_sum"), t: int64p(1234568), + ct: int64p(1000), }, { m: "test_histogram_bucket\xffle\xff-0.0004899999999999998", v: 2, lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), t: int64p(1234568), + ct: int64p(1000), }, { m: "test_histogram_bucket\xffle\xff-0.0003899999999999998", v: 4, lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), t: int64p(1234568), + ct: int64p(1000), }, { m: "test_histogram_bucket\xffle\xff-0.0002899999999999998", v: 16, lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), t: int64p(1234568), + ct: int64p(1000), }, { m: "test_histogram_bucket\xffle\xff+Inf", v: 175, lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), t: int64p(1234568), + ct: int64p(1000), }, { // TODO(krajorama): optimize: this should not be here. In case there's @@ -609,6 +616,7 @@ func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) { }, lset: labels.FromStrings("__name__", "test_histogram"), t: int64p(1234568), + ct: int64p(1000), }, } got := testParse(t, p) @@ -621,6 +629,10 @@ help: "Test histogram with classic and exponential buckets." type: HISTOGRAM metric: < histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > sample_count: 175 sample_sum: 0.0008280461746287094 bucket: < diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 70c24d9ec..3ae9c7ddf 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -102,6 +102,8 @@ type OpenMetricsParser struct { // Created timestamp parsing state. ct int64 ctHashSet uint64 + // ignoreExemplar instructs the parser to not overwrite exemplars (to keep them while peeking ahead). + ignoreExemplar bool // visitedMFName is the metric family name of the last visited metric when peeking ahead // for _created series during the execution of the CreatedTimestamp method. visitedMFName []byte @@ -296,6 +298,14 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 { p.skipCTSeries = false + p.ignoreExemplar = true + savedStart := p.start + defer func() { + p.ignoreExemplar = false + p.start = savedStart + p.l = resetLexer + }() + for { eType, err := p.Next() if err != nil { @@ -303,12 +313,12 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 { // This might result in partial scrape with wrong/missing CT, but only // spec improvement would help. // TODO: Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. - p.resetCTParseValues(resetLexer) + p.resetCTParseValues() return nil } if eType != EntrySeries { // Assume we hit different family, no CT line found. - p.resetCTParseValues(resetLexer) + p.resetCTParseValues() return nil } @@ -322,14 +332,14 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 { peekedHash := p.seriesHash(&buf, peekedName[:len(peekedName)-8]) if peekedHash != currHash { // Found CT line for a different series, for our series no CT. - p.resetCTParseValues(resetLexer) + p.resetCTParseValues() return nil } // All timestamps in OpenMetrics are Unix Epoch in seconds. Convert to milliseconds. // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#timestamps ct := int64(p.val * 1000.0) - p.setCTParseValues(ct, currHash, currName, true, resetLexer) + p.setCTParseValues(ct, currHash, currName, true) return &ct } } @@ -371,17 +381,15 @@ func (p *OpenMetricsParser) seriesHash(offsetsArr *[]byte, metricFamilyName []by // setCTParseValues sets the parser to the state after CreatedTimestamp method was called and CT was found. // This is useful to prevent re-parsing the same series again and early return the CT value. -func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool, resetLexer *openMetricsLexer) { +func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool) { p.ct = ct - p.l = resetLexer p.ctHashSet = ctHashSet p.visitedMFName = mfName p.skipCTSeries = skipCTSeries // Do we need to set it? } // resetCtParseValues resets the parser to the state before CreatedTimestamp method was called. -func (p *OpenMetricsParser) resetCTParseValues(resetLexer *openMetricsLexer) { - p.l = resetLexer +func (p *OpenMetricsParser) resetCTParseValues() { p.ctHashSet = 0 p.skipCTSeries = true } @@ -417,10 +425,12 @@ func (p *OpenMetricsParser) Next() (Entry, error) { p.start = p.l.i p.offsets = p.offsets[:0] - p.eOffsets = p.eOffsets[:0] - p.exemplar = p.exemplar[:0] - p.exemplarVal = 0 - p.hasExemplarTs = false + if !p.ignoreExemplar { + p.eOffsets = p.eOffsets[:0] + p.exemplar = p.exemplar[:0] + p.exemplarVal = 0 + p.hasExemplarTs = false + } switch t := p.nextToken(); t { case tEOFWord: @@ -545,6 +555,16 @@ func (p *OpenMetricsParser) Next() (Entry, error) { func (p *OpenMetricsParser) parseComment() error { var err error + + if p.ignoreExemplar { + for t := p.nextToken(); t != tLinebreak; t = p.nextToken() { + if t == tEOF { + return errors.New("data does not end with # EOF") + } + } + return nil + } + // Parse the labels. p.eOffsets, err = p.parseLVals(p.eOffsets, true) if err != nil { From 7ca90e5729d7602a95afa4537b72229a5cbaf674 Mon Sep 17 00:00:00 2001 From: Jonathan Ballet Date: Thu, 24 Oct 2024 08:53:36 +0200 Subject: [PATCH 300/339] doc: fix formatting Signed-off-by: Jonathan Ballet --- docs/querying/api.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/docs/querying/api.md b/docs/querying/api.md index 6b7ae0524..0352496f1 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -568,7 +568,7 @@ Instant vectors are returned as result type `vector`. The corresponding Each series could have the `"value"` key, or the `"histogram"` key, but not both. Series are not guaranteed to be returned in any particular order unless a function -such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label)` +such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label) is used. ### Scalars @@ -905,7 +905,7 @@ curl -G http://localhost:9091/api/v1/targets/metadata \ ``` The following example returns metadata for all metrics for all targets with -label `instance="127.0.0.1:9090`. +label `instance="127.0.0.1:9090"`. ```json curl -G http://localhost:9091/api/v1/targets/metadata \ @@ -1190,9 +1190,11 @@ The following endpoint returns various cardinality statistics about the Promethe GET /api/v1/status/tsdb ``` URL query parameters: + - `limit=`: Limit the number of returned items to a given number for each set of statistics. By default, 10 items are returned. -The `data` section of the query result consists of +The `data` section of the query result consists of: + - **headStats**: This provides the following data about the head block of the TSDB: - **numSeries**: The number of series. - **chunkCount**: The number of chunks. @@ -1268,13 +1270,13 @@ The following endpoint returns information about the WAL replay: GET /api/v1/status/walreplay ``` -**read**: The number of segments replayed so far. -**total**: The total number segments needed to be replayed. -**progress**: The progress of the replay (0 - 100%). -**state**: The state of the replay. Possible states: -- **waiting**: Waiting for the replay to start. -- **in progress**: The replay is in progress. -- **done**: The replay has finished. +- **read**: The number of segments replayed so far. +- **total**: The total number segments needed to be replayed. +- **progress**: The progress of the replay (0 - 100%). +- **state**: The state of the replay. Possible states: + - **waiting**: Waiting for the replay to start. + - **in progress**: The replay is in progress. + - **done**: The replay has finished. ```json $ curl http://localhost:9090/api/v1/status/walreplay From 469573b13b728a0d5a96b7dc55a205d06c712abf Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Thu, 24 Oct 2024 18:14:05 +0200 Subject: [PATCH 301/339] fix(nhcb): do not return nhcb from parse if exponential is present (#15209) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: https://github.com/prometheus/prometheus/pull/14978#discussion_r1800755481 Also encode the requirement table set in #13532 Signed-off-by: György Krajcsovits --- model/textparse/nhcbparse.go | 37 +++- model/textparse/nhcbparse_test.go | 353 ++++++++++++++++++++++-------- 2 files changed, 286 insertions(+), 104 deletions(-) diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go index eab9fa7e6..79f5c892a 100644 --- a/model/textparse/nhcbparse.go +++ b/model/textparse/nhcbparse.go @@ -98,9 +98,11 @@ type NHCBParser struct { // Remembers the last base histogram metric name (assuming it's // a classic histogram) so we can tell if the next float series // is part of the same classic histogram. - lastHistogramName string - lastHistogramLabelsHash uint64 - hBuffer []byte + lastHistogramName string + lastHistogramLabelsHash uint64 + lastHistogramExponential bool + // Reused buffer for hashing labels. + hBuffer []byte } func NewNHCBParser(p Parser, st *labels.SymbolTable, keepClassicHistograms bool) Parser { @@ -199,10 +201,21 @@ func (p *NHCBParser) Next() (Entry, error) { p.bytes, p.ts, p.value = p.parser.Series() p.metricString = p.parser.Metric(&p.lset) // Check the label set to see if we can continue or need to emit the NHCB. - if p.compareLabels() && p.processNHCB() { - return EntryHistogram, nil + var isNHCB bool + if p.compareLabels() { + // Labels differ. Check if we can emit the NHCB. + if p.processNHCB() { + return EntryHistogram, nil + } + isNHCB = p.handleClassicHistogramSeries(p.lset) + } else { + // Labels are the same. Check if after an exponential histogram. + if p.lastHistogramExponential { + isNHCB = false + } else { + isNHCB = p.handleClassicHistogramSeries(p.lset) + } } - isNHCB := p.handleClassicHistogramSeries(p.lset) if isNHCB && !p.keepClassicHistograms { // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. return p.Next() @@ -211,6 +224,7 @@ func (p *NHCBParser) Next() (Entry, error) { case EntryHistogram: p.bytes, p.ts, p.h, p.fh = p.parser.Histogram() p.metricString = p.parser.Metric(&p.lset) + p.storeExponentialLabels() case EntryType: p.bName, p.typ = p.parser.Type() } @@ -239,9 +253,16 @@ func (p *NHCBParser) compareLabels() bool { } // Save the label set of the classic histogram without suffix and bucket `le` label. -func (p *NHCBParser) storeBaseLabels() { +func (p *NHCBParser) storeClassicLabels() { p.lastHistogramName = convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + p.lastHistogramExponential = false +} + +func (p *NHCBParser) storeExponentialLabels() { + p.lastHistogramName = p.lset.Get(labels.MetricName) + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer) + p.lastHistogramExponential = true } // handleClassicHistogramSeries collates the classic histogram series to be converted to NHCB @@ -282,7 +303,7 @@ func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) { if p.state != stateCollecting { - p.storeBaseLabels() + p.storeClassicLabels() p.tempCT = p.parser.CreatedTimestamp() p.state = stateCollecting } diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go index 1ead2e30e..b97de0f7e 100644 --- a/model/textparse/nhcbparse_test.go +++ b/model/textparse/nhcbparse_test.go @@ -16,6 +16,7 @@ package textparse import ( "bytes" "encoding/binary" + "strconv" "testing" "github.com/gogo/protobuf/proto" @@ -493,7 +494,6 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 {Labels: labels.FromStrings("id", "something-test"), Value: 0.5}, {Labels: labels.FromStrings("id", "something-test"), Value: 8.0}, }, - // TODO(krajorama): ct: int64p(1520430001000), }, { m: `something{a="b"}`, shs: &histogram.Histogram{ @@ -509,7 +509,6 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 {Labels: labels.FromStrings("id", "something-test"), Value: 0.0, HasTs: true, Ts: 123321}, {Labels: labels.FromStrings("id", "something-test"), Value: 2e100, HasTs: true, Ts: 123000}, }, - // TODO(krajorama): ct: int64p(1520430002000), }, } @@ -520,112 +519,208 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 requireEntries(t, exp, got) } -// Verify that the NHCBParser does not parse the NHCB when the exponential is present. +// Verify the requirement tables from +// https://github.com/prometheus/prometheus/issues/13532 . +// "classic" means the option "always_scrape_classic_histograms". +// "nhcb" means the option "convert_classic_histograms_to_nhcb". +// +// Currently only with the ProtoBuf parser that supports exponential +// histograms. +// +// Case 1. Only classic histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | YES | NO | NO |. +// | classic=true, nhcb=false | YES | NO | NO |. +// | classic=false, nhcb=true | NO | NO | YES |. +// | classic=true, nhcb=true | YES | NO | YES |. +// +// Case 2. Both classic and exponential histograms are exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | YES | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | YES | YES | NO |. +// +// Case 3. Only exponential histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | NO | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | NO | YES | NO |. func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) { - inputBuf := createTestProtoBufHistogram(t) - // Initialize the protobuf parser so that it returns classic histograms as - // well when there's both classic and exponential histograms. - p := NewProtobufParser(inputBuf.Bytes(), true, labels.NewSymbolTable()) + type requirement struct { + expectClassic bool + expectExponential bool + expectNHCB bool + } - // Initialize the NHCBParser so that it returns classic histograms as well - // when there's both classic and exponential histograms. - p = NewNHCBParser(p, labels.NewSymbolTable(), true) - - exp := []parsedEntry{ + cases := []map[string]requirement{ + // Case 1. { - m: "test_histogram", - help: "Test histogram with classic and exponential buckets.", + "classic=false, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: false, expectNHCB: true}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: false, expectNHCB: true}, }, + // Case 2. { - m: "test_histogram", - typ: model.MetricTypeHistogram, + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: true, expectNHCB: false}, }, + // Case 3. { - m: "test_histogram", - shs: &histogram.Histogram{ - Schema: 3, - Count: 175, - Sum: 0.0008280461746287094, - ZeroThreshold: 2.938735877055719e-39, - ZeroCount: 2, - PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, - NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, - PositiveBuckets: []int64{1, 2, -1, -1}, - NegativeBuckets: []int64{1, 3, -2, -1, 1}, - }, - lset: labels.FromStrings("__name__", "test_histogram"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: "test_histogram_count", - v: 175, - lset: labels.FromStrings("__name__", "test_histogram_count"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: "test_histogram_sum", - v: 0.0008280461746287094, - lset: labels.FromStrings("__name__", "test_histogram_sum"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: "test_histogram_bucket\xffle\xff-0.0004899999999999998", - v: 2, - lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0004899999999999998"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: "test_histogram_bucket\xffle\xff-0.0003899999999999998", - v: 4, - lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0003899999999999998"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: "test_histogram_bucket\xffle\xff-0.0002899999999999998", - v: 16, - lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "-0.0002899999999999998"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: "test_histogram_bucket\xffle\xff+Inf", - v: 175, - lset: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - // TODO(krajorama): optimize: this should not be here. In case there's - // an exponential histogram we should not convert the classic histogram - // to NHCB. In the end TSDB will throw this away with - // storage.errDuplicateSampleForTimestamp error at Commit(), but it - // is better to avoid this conversion in the first place. - m: "test_histogram{}", - shs: &histogram.Histogram{ - Schema: histogram.CustomBucketsSchema, - Count: 175, - Sum: 0.0008280461746287094, - PositiveSpans: []histogram.Span{{Length: 4}}, - PositiveBuckets: []int64{2, 0, 10, 147}, - CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, - }, - lset: labels.FromStrings("__name__", "test_histogram"), - t: int64p(1234568), - ct: int64p(1000), + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, }, } - got := testParse(t, p) - requireEntries(t, exp, got) + + type testCase struct { + name string + classic bool + nhcb bool + exp []parsedEntry + } + + testCases := []testCase{} + for _, classic := range []bool{false, true} { + for _, nhcb := range []bool{false, true} { + tc := testCase{ + name: "classic=" + strconv.FormatBool(classic) + ", nhcb=" + strconv.FormatBool(nhcb), + classic: classic, + nhcb: nhcb, + exp: []parsedEntry{}, + } + for i, caseI := range cases { + req := caseI[tc.name] + metric := "test_histogram" + strconv.Itoa(i+1) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + help: "Test histogram " + strconv.Itoa(i+1), + }) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + typ: model.MetricTypeHistogram, + }) + if req.expectExponential { + // Always expect exponential histogram first. + exponentialSeries := []parsedEntry{ + { + m: metric, + shs: &histogram.Histogram{ + Schema: 3, + Count: 175, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 2, + PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, + NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: int64p(1000), + }, + } + tc.exp = append(tc.exp, exponentialSeries...) + } + if req.expectClassic { + // Always expect classic histogram series after exponential. + classicSeries := []parsedEntry{ + { + m: metric + "_count", + v: 175, + lset: labels.FromStrings("__name__", metric+"_count"), + t: int64p(1234568), + ct: int64p(1000), + }, + { + m: metric + "_sum", + v: 0.0008280461746287094, + lset: labels.FromStrings("__name__", metric+"_sum"), + t: int64p(1234568), + ct: int64p(1000), + }, + { + m: metric + "_bucket\xffle\xff-0.0004899999999999998", + v: 2, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0004899999999999998"), + t: int64p(1234568), + ct: int64p(1000), + }, + { + m: metric + "_bucket\xffle\xff-0.0003899999999999998", + v: 4, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0003899999999999998"), + t: int64p(1234568), + ct: int64p(1000), + }, + { + m: metric + "_bucket\xffle\xff-0.0002899999999999998", + v: 16, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0002899999999999998"), + t: int64p(1234568), + ct: int64p(1000), + }, + { + m: metric + "_bucket\xffle\xff+Inf", + v: 175, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "+Inf"), + t: int64p(1234568), + ct: int64p(1000), + }, + } + tc.exp = append(tc.exp, classicSeries...) + } + if req.expectNHCB { + // Always expect NHCB series after classic. + nhcbSeries := []parsedEntry{ + { + m: metric + "{}", + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 175, + Sum: 0.0008280461746287094, + PositiveSpans: []histogram.Span{{Length: 4}}, + PositiveBuckets: []int64{2, 0, 10, 147}, + CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: int64p(1000), + }, + } + tc.exp = append(tc.exp, nhcbSeries...) + } + } + testCases = append(testCases, tc) + } + } + + inputBuf := createTestProtoBufHistogram(t) + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := NewProtobufParser(inputBuf.Bytes(), tc.classic, labels.NewSymbolTable()) + if tc.nhcb { + p = NewNHCBParser(p, labels.NewSymbolTable(), tc.classic) + } + got := testParse(t, p) + requireEntries(t, tc.exp, got) + }) + } } func createTestProtoBufHistogram(t *testing.T) *bytes.Buffer { - testMetricFamilies := []string{`name: "test_histogram" -help: "Test histogram with classic and exponential buckets." + testMetricFamilies := []string{`name: "test_histogram1" +help: "Test histogram 1" type: HISTOGRAM metric: < histogram: < @@ -647,6 +742,72 @@ metric: < cumulative_count: 16 upper_bound: -0.0002899999999999998 > + > + timestamp_ms: 1234568 +>`, `name: "test_histogram2" +help: "Test histogram 2" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +>`, `name: "test_histogram3" +help: "Test histogram 3" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 schema: 3 zero_threshold: 2.938735877055719e-39 zero_count: 2 From 3cb09acb218189de660703a2823a24dc53f3a978 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Thu, 24 Oct 2024 18:18:21 +0200 Subject: [PATCH 302/339] Docs: Remove experimental note on out of order feature (#15215) Signed-off-by: Jesus Vazquez --- docs/configuration/configuration.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 31ceac734..104f7754f 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -2940,8 +2940,6 @@ with this feature. `tsdb` lets you configure the runtime-reloadable configuration settings of the TSDB. -NOTE: Out-of-order ingestion is an experimental feature, but you do not need any additional flag to enable it. Setting `out_of_order_time_window` to a positive duration enables it. - ```yaml # Configures how old an out-of-order/out-of-bounds sample can be w.r.t. the TSDB max time. # An out-of-order/out-of-bounds sample is ingested into the TSDB as long as the timestamp From 99882eec3ba32d45178fa4651be5f68048bae8e4 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Thu, 24 Oct 2024 09:27:15 -0700 Subject: [PATCH 303/339] log last series labelset when hitting OOO series labels during compaction Signed-off-by: Ben Ye --- tsdb/index/index.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 3cd00729a..8c0f698ea 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -438,7 +438,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... return err } if labels.Compare(lset, w.lastSeries) <= 0 { - return fmt.Errorf("out-of-order series added with label set %q", lset) + return fmt.Errorf("out-of-order series added with label set %q, last label set %q", lset, w.lastSeries) } if ref < w.lastSeriesRef && !w.lastSeries.IsEmpty() { From 20fdc8f541274aa117dafe974c2118c07f05d8a6 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Thu, 24 Oct 2024 14:07:54 +0100 Subject: [PATCH 304/339] [CHANGE] Remote-write: default enable_http2 to false Remote-write creates several shards to parallelise sending, each with its own http connection. We do not want them all combined onto one socket by http2. Signed-off-by: Bryan Boreham --- CHANGELOG.md | 1 + config/config.go | 7 ++++++- docs/configuration/configuration.md | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72d9f7a11..084b88d6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136 +* [CHANGE] Remote-write: default enable_http2 to false. * [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 - [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 - [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 diff --git a/config/config.go b/config/config.go index 657c4fc75..30a74e040 100644 --- a/config/config.go +++ b/config/config.go @@ -181,13 +181,18 @@ var ( HTTPClientConfig: config.DefaultHTTPClientConfig, } + DefaultRemoteWriteHTTPClientConfig = config.HTTPClientConfig{ + FollowRedirects: true, + EnableHTTP2: false, + } + // DefaultRemoteWriteConfig is the default remote write configuration. DefaultRemoteWriteConfig = RemoteWriteConfig{ RemoteTimeout: model.Duration(30 * time.Second), ProtobufMessage: RemoteWriteProtoMsgV1, QueueConfig: DefaultQueueConfig, MetadataConfig: DefaultMetadataConfig, - HTTPClientConfig: config.DefaultHTTPClientConfig, + HTTPClientConfig: DefaultRemoteWriteHTTPClientConfig, } // DefaultQueueConfig is the default remote queue configuration. diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 104f7754f..2093ed883 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -2889,6 +2889,7 @@ metadata_config: # HTTP client settings, including authentication methods (such as basic auth and # authorization), proxy configurations, TLS options, custom HTTP headers, etc. +# enable_http2 defaults to false for remote-write. [ ] ``` From 7939eab77ae11cc064c114c7b5e2df7190ff4777 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Thu, 24 Oct 2024 22:32:08 +0200 Subject: [PATCH 305/339] remote-write: change test default expected to http2 disabled Signed-off-by: Jan Fajerski --- config/config_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/config_test.go b/config/config_test.go index 8bf664c1f..c3148f93a 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -142,7 +142,7 @@ var expectedConf = &Config{ }, }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, }, { @@ -158,7 +158,7 @@ var expectedConf = &Config{ KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, Headers: map[string]string{"name": "value"}, }, From b602393473ac8ddceb9c3de308643414f8d2b531 Mon Sep 17 00:00:00 2001 From: TJ Hoplock Date: Thu, 24 Oct 2024 01:01:25 -0400 Subject: [PATCH 306/339] fix: avoid data race in log deduper This change should have been included in the initial prometheus slog conversion, but I must've lost track of it in all the rebases involved in that PR. This changes the dedupe logger so that the only method that needs to use the lock is the `Handle()` method that actually interacts with the deduplication map. Ex: ``` ================== WARNING: DATA RACE Write at 0x00c000518bc0 by goroutine 29481: github.com/prometheus/prometheus/util/logging.(*Deduper).WithAttrs() /home/tjhop/go/src/github.com/prometheus/prometheus/util/logging/dedupe.go:89 +0xef log/slog.(*Logger).With() /home/tjhop/.asdf/installs/golang/1.23.1/go/src/log/slog/logger.go:132 +0x106 github.com/prometheus/prometheus/storage/remote.NewQueueManager() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/queue_manager.go:483 +0x7a9 github.com/prometheus/prometheus/storage/remote.(*WriteStorage).ApplyConfig() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/write.go:201 +0x102c github.com/prometheus/prometheus/storage/remote.(*Storage).ApplyConfig() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/storage.go:92 +0xfd github.com/prometheus/prometheus/storage/remote.TestWriteStorageApplyConfigsDuringCommit.func1() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/storage_test.go:172 +0x3e4 github.com/prometheus/prometheus/storage/remote.TestWriteStorageApplyConfigsDuringCommit.gowrap1() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/storage_test.go:174 +0x41 Previous read at 0x00c000518bc0 by goroutine 31261: github.com/prometheus/prometheus/util/logging.(*Deduper).Handle() /home/tjhop/go/src/github.com/prometheus/prometheus/util/logging/dedupe.go:82 +0x2b1 log/slog.(*Logger).log() /home/tjhop/.asdf/installs/golang/1.23.1/go/src/log/slog/logger.go:257 +0x228 log/slog.(*Logger).Error() /home/tjhop/.asdf/installs/golang/1.23.1/go/src/log/slog/logger.go:230 +0x3d4 github.com/prometheus/prometheus/tsdb/wlog.(*Watcher).loop() /home/tjhop/go/src/github.com/prometheus/prometheus/tsdb/wlog/watcher.go:254 +0x2db github.com/prometheus/prometheus/tsdb/wlog.(*Watcher).Start.gowrap1() /home/tjhop/go/src/github.com/prometheus/prometheus/tsdb/wlog/watcher.go:227 +0x33 Goroutine 29481 (running) created at: github.com/prometheus/prometheus/storage/remote.TestWriteStorageApplyConfigsDuringCommit() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/storage_test.go:164 +0xe4 testing.tRunner() /home/tjhop/.asdf/installs/golang/1.23.1/go/src/testing/testing.go:1690 +0x226 testing.(*T).Run.gowrap1() /home/tjhop/.asdf/installs/golang/1.23.1/go/src/testing/testing.go:1743 +0x44 Goroutine 31261 (running) created at: github.com/prometheus/prometheus/tsdb/wlog.(*Watcher).Start() /home/tjhop/go/src/github.com/prometheus/prometheus/tsdb/wlog/watcher.go:227 +0x177 github.com/prometheus/prometheus/storage/remote.(*QueueManager).Start() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/queue_manager.go:934 +0x304 github.com/prometheus/prometheus/storage/remote.(*WriteStorage).ApplyConfig() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/write.go:232 +0x151b github.com/prometheus/prometheus/storage/remote.(*Storage).ApplyConfig() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/storage.go:92 +0xfd github.com/prometheus/prometheus/storage/remote.TestWriteStorageApplyConfigsDuringCommit.func1() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/storage_test.go:172 +0x3e4 github.com/prometheus/prometheus/storage/remote.TestWriteStorageApplyConfigsDuringCommit.gowrap1() /home/tjhop/go/src/github.com/prometheus/prometheus/storage/remote/storage_test.go:174 +0x41 ================== --- FAIL: TestWriteStorageApplyConfigsDuringCommit (2.26s) testing.go:1399: race detected during execution of test FAIL FAIL github.com/prometheus/prometheus/storage/remote 68.321s ``` Signed-off-by: TJ Hoplock --- util/logging/dedupe.go | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/util/logging/dedupe.go b/util/logging/dedupe.go index 37b345b39..b08b80a85 100644 --- a/util/logging/dedupe.go +++ b/util/logging/dedupe.go @@ -51,11 +51,7 @@ func Dedupe(next *slog.Logger, repeat time.Duration) *Deduper { // provided context and log level, and returns false otherwise. It implements // slog.Handler. func (d *Deduper) Enabled(ctx context.Context, level slog.Level) bool { - d.mtx.RLock() - enabled := d.next.Enabled(ctx, level) - d.mtx.RUnlock() - - return enabled + return d.next.Enabled(ctx, level) } // Handle uses the provided context and slog.Record to deduplicate messages @@ -85,19 +81,27 @@ func (d *Deduper) Handle(ctx context.Context, r slog.Record) error { // WithAttrs adds the provided attributes to the Deduper's internal // slog.Logger. It implements slog.Handler. func (d *Deduper) WithAttrs(attrs []slog.Attr) slog.Handler { - d.mtx.Lock() - d.next = slog.New(d.next.Handler().WithAttrs(attrs)) - d.mtx.Unlock() - return d + return &Deduper{ + next: slog.New(d.next.Handler().WithAttrs(attrs)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } } // WithGroup adds the provided group name to the Deduper's internal // slog.Logger. It implements slog.Handler. func (d *Deduper) WithGroup(name string) slog.Handler { - d.mtx.Lock() - d.next = slog.New(d.next.Handler().WithGroup(name)) - d.mtx.Unlock() - return d + if name == "" { + return d + } + + return &Deduper{ + next: slog.New(d.next.Handler().WithGroup(name)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } } // Info logs the provided message and key-value arguments using the Deduper's From 4f9e4dc0165e3a7d818f2933c80aaa9c2097b3c5 Mon Sep 17 00:00:00 2001 From: TJ Hoplock Date: Thu, 24 Oct 2024 01:31:21 -0400 Subject: [PATCH 307/339] ref: remove unused deduper log wrapper methods I used these wrapper methods during initial development of the custom handler that the deduper now implements. Since the deduper implements slog.Handler and can be used directly as a logger, these wrapper methods are no longer needed. Signed-off-by: TJ Hoplock --- util/logging/dedupe.go | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/util/logging/dedupe.go b/util/logging/dedupe.go index b08b80a85..d5aee5c09 100644 --- a/util/logging/dedupe.go +++ b/util/logging/dedupe.go @@ -104,30 +104,6 @@ func (d *Deduper) WithGroup(name string) slog.Handler { } } -// Info logs the provided message and key-value arguments using the Deduper's -// internal slog.Logger. It is simply a wrapper around slog.Logger.Info(). -func (d *Deduper) Info(msg string, args ...any) { - d.next.Info(msg, args...) -} - -// Warn logs the provided message and key-value arguments using the Deduper's -// internal slog.Logger. It is simply a wrapper around slog.Logger.Warn(). -func (d *Deduper) Warn(msg string, args ...any) { - d.next.Warn(msg, args...) -} - -// Error logs the provided message and key-value arguments using the Deduper's -// internal slog.Logger. It is simply a wrapper around slog.Logger.Error(). -func (d *Deduper) Error(msg string, args ...any) { - d.next.Error(msg, args...) -} - -// Debug logs the provided message and key-value arguments using the Deduper's -// internal slog.Logger. It is simply a wrapper around slog.Logger.Debug(). -func (d *Deduper) Debug(msg string, args ...any) { - d.next.Debug(msg, args...) -} - // Stop the Deduper. func (d *Deduper) Stop() { close(d.quit) From f131cdd4c5471deeda4db376d2f2b804e386dd96 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Fri, 25 Oct 2024 12:30:13 +0200 Subject: [PATCH 308/339] 3.0 migration guide (#15099) * docs: 2 to 3 migration guide Signed-off-by: Jan Fajerski * docs/stability: add 3.0 section Signed-off-by: Jan Fajerski * docs/migration: details on enabling legacy name validation Signed-off-by: Owen Williams \ * migration: add log format and `le` normalization Signed-off-by: Jan Fajerski * migration: add new enable_http2 default for remote write Signed-off-by: Jan Fajerski --------- Signed-off-by: Jan Fajerski Signed-off-by: Owen Williams Co-authored-by: Owen Williams --- docs/migration.md | 348 +++++++++++++++++++++++----------------------- docs/stability.md | 16 ++- 2 files changed, 184 insertions(+), 180 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index cb88bbfd6..43fc43df2 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -3,198 +3,198 @@ title: Migration sort_rank: 10 --- -# Prometheus 2.0 migration guide +# Prometheus 3.0 migration guide -In line with our [stability promise](https://prometheus.io/blog/2016/07/18/prometheus-1-0-released/#fine-print), -the Prometheus 2.0 release contains a number of backwards incompatible changes. -This document offers guidance on migrating from Prometheus 1.8 to Prometheus 2.0 and newer versions. +In line with our [stability promise](https://prometheus.io/docs/prometheus/latest/stability/), +the Prometheus 3.0 release contains a number of backwards incompatible changes. +This document offers guidance on migrating from Prometheus 2.x to Prometheus 3.0 and newer versions. ## Flags -The format of Prometheus command line flags has changed. Instead of a -single dash, all flags now use a double dash. Common flags (`--config.file`, -`--web.listen-address` and `--web.external-url`) remain but -almost all storage-related flags have been removed. +- The following feature flags have been removed and they have been added to the + default behavior of Prometheus v3: + - `promql-at-modifier` + - `promql-negative-offset` + - `remote-write-receiver` + - `new-service-discovery-manager` + - `expand-external-labels` + Environment variable references `${var}` or `$var` in external label values + are replaced according to the values of the current environment variables. + References to undefined variables are replaced by the empty string. + The `$` character can be escaped by using `$$`. + - `no-default-scrape-port` + Prometheus v3 will no longer add ports to scrape targets according to the + specified scheme. Target will now appear in labels as configured. + If you rely on scrape targets like + `https://example.com/metrics` or `http://exmaple.com/metrics` to be + represented as `https://example.com/metrics:443` and + `http://example.com/metrics:80` respectively, add them to your target URLs + - `agent` + Instead use the dedicated `--agent` cli flag. -Some notable flags which have been removed: + Prometheus v3 will log a warning if you continue to pass these to + `--enable-feature`. -- `-alertmanager.url` In Prometheus 2.0, the command line flags for configuring - a static Alertmanager URL have been removed. Alertmanager must now be - discovered via service discovery, see [Alertmanager service discovery](#alertmanager-service-discovery). +## Configuration -- `-log.format` In Prometheus 2.0 logs can only be streamed to standard error. - -- `-query.staleness-delta` has been renamed to `--query.lookback-delta`; Prometheus - 2.0 introduces a new mechanism for handling staleness, see [staleness](querying/basics.md#staleness). - -- `-storage.local.*` Prometheus 2.0 introduces a new storage engine; as such all - flags relating to the old engine have been removed. For information on the - new engine, see [Storage](#storage). - -- `-storage.remote.*` Prometheus 2.0 has removed the deprecated remote - storage flags, and will fail to start if they are supplied. To write to - InfluxDB, Graphite, or OpenTSDB use the relevant storage adapter. - -## Alertmanager service discovery - -Alertmanager service discovery was introduced in Prometheus 1.4, allowing Prometheus -to dynamically discover Alertmanager replicas using the same mechanism as scrape -targets. In Prometheus 2.0, the command line flags for static Alertmanager config -have been removed, so the following command line flag: - -``` -./prometheus -alertmanager.url=http://alertmanager:9093/ -``` - -Would be replaced with the following in the `prometheus.yml` config file: - -```yaml -alerting: - alertmanagers: - - static_configs: - - targets: - - alertmanager:9093 -``` - -You can also use all the usual Prometheus service discovery integrations and -relabeling in your Alertmanager configuration. This snippet instructs -Prometheus to search for Kubernetes pods, in the `default` namespace, with the -label `name: alertmanager` and with a non-empty port. - -```yaml -alerting: - alertmanagers: - - kubernetes_sd_configs: - - role: pod - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_name] - regex: alertmanager - action: keep - - source_labels: [__meta_kubernetes_namespace] - regex: default - action: keep - - source_labels: [__meta_kubernetes_pod_container_port_number] - regex: - action: drop -``` - -## Recording rules and alerts - -The format for configuring alerting and recording rules has been changed to YAML. -An example of a recording rule and alert in the old format: - -``` -job:request_duration_seconds:histogram_quantile99 = - histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - -ALERT FrontendRequestLatency - IF job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - FOR 5m - ANNOTATIONS { - summary = "High frontend request latency", - } -``` - -Would look like this: - -```yaml -groups: -- name: example.rules - rules: - - record: job:request_duration_seconds:histogram_quantile99 - expr: histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - - alert: FrontendRequestLatency - expr: job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - for: 5m - annotations: - summary: High frontend request latency -``` - -To help with the change, the `promtool` tool has a mode to automate the rules conversion. Given a `.rules` file, it will output a `.rules.yml` file in the -new format. For example: - -``` -$ promtool update rules example.rules -``` - -You will need to use `promtool` from [Prometheus 2.5](https://github.com/prometheus/prometheus/releases/tag/v2.5.0) as later versions no longer contain the above subcommand. - -## Storage - -The data format in Prometheus 2.0 has completely changed and is not backwards -compatible with 1.8 and older versions. To retain access to your historic monitoring data we -recommend you run a non-scraping Prometheus instance running at least version -1.8.1 in parallel with your Prometheus 2.0 instance, and have the new server -read existing data from the old one via the remote read protocol. - -Your Prometheus 1.8 instance should be started with the following flags and an -config file containing only the `external_labels` setting (if any): - -``` -$ ./prometheus-1.8.1.linux-amd64/prometheus -web.listen-address ":9094" -config.file old.yml -``` - -Prometheus 2.0 can then be started (on the same machine) with the following flags: - -``` -$ ./prometheus-2.0.0.linux-amd64/prometheus --config.file prometheus.yml -``` - -Where `prometheus.yml` contains in addition to your full existing configuration, the stanza: - -```yaml -remote_read: - - url: "http://localhost:9094/api/v1/read" -``` +- The scrape job level configuration option `scrape_classic_histograms` has been + renamed to `always_scrape_classic_histograms`. If you use the + `--enable-feature=native-histograms` feature flag to ingest native histograms + and you also want to ingest classic histograms that an endpoint might expose + along with native histograms, be sure to add this configuration or change your + configuration from the old name. +- The `http_config.enable_http2` in `remote_write` items default has been + changed to `false`. In Prometheus v2 the remote write http client would + default to use http2. In order to parallelize multiple remote write queues + across multiple sockets its preferable to not default to http2. + If you prefer to use http2 for remote write you must now set + `http_config.enable_http2: true` in your `remote_write` configuration section. ## PromQL -The following features have been removed from PromQL: +- The `.` pattern in regular expressions in PromQL matches newline characters. + With this change a regular expressions like `.*` matches strings that include + `\n`. This applies to matchers in queries and relabel configs. For example the + following regular expressions now match the accompanying strings, wheras in + Prometheus v2 these combinations didn't match. -- `drop_common_labels` function - the `without` aggregation modifier should be used - instead. -- `keep_common` aggregation modifier - the `by` modifier should be used instead. -- `count_scalar` function - use cases are better handled by `absent()` or correct - propagation of labels in operations. +| Regex | Additional matches | +| ----- | ------ | +| ".*" | "foo\n", "Foo\nBar" | +| "foo.?bar" | "foo\nbar" | +| "foo.+bar" | "foo\nbar" | -See [issue #3060](https://github.com/prometheus/prometheus/issues/3060) for more -details. + If you want Prometheus v3 to behave like v2 did, you will have to change your + regular expressions by replacing all `.` patterns with `[^\n]`, e.g. + `foo[^\n]*`. +- Lookback and range selectors are left open and right closed (previously left + closed and right closed). This change affects queries when the evaluation time + perfectly aligns with the sample timestamps. For example assume querying a + timeseries with even spaced samples exactly 1 minute apart. Before Prometheus + 3.x, range query with `5m` will mostly return 5 samples. But if the query + evaluation aligns perfectly with a scrape, it would return 6 samples. In + Prometheus 3.x queries like this will always return 5 samples. + This change has likely few effects for everyday use, except for some sub query + use cases. + Query front-ends that align queries usually align sub-queries to multiples of + the step size. These sub queries will likely be affected. + Tests are more likely to affected. To fix those either adjust the expected + number of samples or extend to range by less then one sample interval. +- The `holt_winters` function has been renamed to `double_exponential_smoothing` + and is now guarded by the `promql-experimental-functions` feature flag. + If you want to keep using holt_winters, you have to do both of these things: + - Rename holt_winters to double_exponential_smoothing in your queries. + - Pass `--enable-feature=promql-experimental-functions` in your Prometheus + cli invocation.. + +## Scrape protocols +Prometheus v3 is more strict concerning the Content-Type header received when +scraping. Prometheus v2 would default to the standard Prometheus text protocol +if the target being scraped did not specify a Content-Type header or if the +header was unparsable or unrecognised. This could lead to incorrect data being +parsed in the scrape. Prometheus v3 will now fail the scrape in such cases. + +If a scrape target is not providing the correct Content-Type header the +fallback protocol can be specified using the fallback_scrape_protocol +parameter. See [Prometheus scrape_config documentation.](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) + +This is a breaking change as scrapes that may have succeeded with Prometheus v2 +may now fail if this fallback protocol is not specified. ## Miscellaneous -### Prometheus non-root user +### TSDB format and downgrade +The TSDB format has been changed in Prometheus v2.55 in preparation for changes +to the index format. Consequently a Prometheus v3 tsdb can only be read by a +Prometheus v2.55 or newer. +Before upgrading to Prometheus v3 please upgrade to v2.55 first and confirm +Prometheus works as expected. Only then continue with the upgrade to v3. -The Prometheus Docker image is now built to [run Prometheus -as a non-root user](https://github.com/prometheus/prometheus/pull/2859). If you -want the Prometheus UI/API to listen on a low port number (say, port 80), you'll -need to override it. For Kubernetes, you would use the following YAML: +### TSDB Storage contract +TSDB compatible storage is now expected to return results matching the specified +selectors. This might impact some third party implementations, most likely +implementing `remote_read`. +This contract is not explicitly enforced, but can cause undefined behavior. + +### UTF-8 names +Prometheus v3 supports UTF-8 in metric and label names. This means metric and +label names can change after upgrading according to what is exposed by +endpoints. Furthermore, metric and label names that would have previously been +flagged as invalid no longer will be. + +Users wishing to preserve the original validation behavior can update their +prometheus yaml configuration to specify the legacy validation scheme: + +``` +global: + metric_name_validation_scheme: legacy +``` + +Or on a per-scrape basis: + +``` +scrape_configs: + - job_name: job1 + metric_name_validation_scheme: utf8 + - job_name: job2 + metric_name_validation_scheme: legacy +``` + +### Log message format +Prometheus v3 has adopted `log/slog` over the previous `go-kit/log`. This +results in a change of log message format. An example of the old log format is: +``` +ts=2024-10-23T22:01:06.074Z caller=main.go:627 level=info msg="No time or size retention was set so using the default time retention" duration=15d +ts=2024-10-23T22:01:06.074Z caller=main.go:671 level=info msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=91d80252c3e528728b0f88d254dd720f6be07cb8-modified)" +ts=2024-10-23T22:01:06.074Z caller=main.go:676 level=info build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" +ts=2024-10-23T22:01:06.074Z caller=main.go:677 level=info host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" +``` + +a similar sequence in the new log format looks like this: +``` +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:640 msg="No time or size retention was set so using the default time retention" duration=15d +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:681 msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=7c7116fea8343795cae6da42960cacd0207a2af8)" +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:686 msg="operational information" build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" fd_limits="(soft=1048576, hard=1048576)" vm_limits="(soft=unlimited, hard=unlimited)" +``` + +### `le` and `quantile` label values +In Prometheus v3, the values of the `le` label of classic histograms and the +`quantile` label of summaries are normalized upon ingestions. In Prometheus v2 +the value of these labels depended on the scrape protocol (protobuf vs text +format) in some situations. This led to label values changing based on the +scrape protocol. E.g. a metric exposed as `my_classic_hist{le="1"}` would be +ingested as `my_classic_hist{le="1"}` via the text format, but as +`my_classic_hist{le="1.0"}` via protobuf. This changed the identity of the +metric and caused problems when querying the metric. +In Prometheus v3 these label values will always be normalized to a float like +representation. I.e. the above example will always result in +`my_classic_hist{le="1.0"}` being ingested into prometheus, no matter via which +protocol. The effect of this change is that alerts, recording rules and +dashboards that directly reference label values as whole numbers such as +`le="1"` will stop working. + +Ways to deal with this change either globally or on a per metric basis: + +- Fix references to integer `le`, `quantile` label values, but otherwise do +nothing and accept that some queries that span the transition time will produce +inaccurate or unexpected results. +_This is the recommended solution._ +- Use `metric_relabel_config` to retain the old labels when scraping targets. +This should **only** be applied to metrics that currently produce such labels. ```yaml -apiVersion: v1 -kind: Pod -metadata: - name: security-context-demo-2 -spec: - securityContext: - runAsUser: 0 -... + metric_relabel_configs: + - source_labels: + - quantile + target_label: quantile + regex: (\d+)\.0+ + - source_labels: + - le + - __name__ + target_label: le + regex: (\d+)\.0+;.*_bucket ``` -See [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) -for more details. +# Prometheus 2.0 migration guide -If you're using Docker, then the following snippet would be used: - -``` -docker run -p 9090:9090 prom/prometheus:latest -``` - -### Prometheus lifecycle - -If you use the Prometheus `/-/reload` HTTP endpoint to [automatically reload your -Prometheus config when it changes](configuration/configuration.md), -these endpoints are disabled by default for security reasons in Prometheus 2.0. -To enable them, set the `--web.enable-lifecycle` flag. +For the Prometheus 1.8 to 2.0 please refer to the [Prometheus v2.55 documentation](https://prometheus.io/docs/prometheus/2.55/migration/). diff --git a/docs/stability.md b/docs/stability.md index 1fd2e51e0..cb30b8ad9 100644 --- a/docs/stability.md +++ b/docs/stability.md @@ -9,7 +9,7 @@ Prometheus promises API stability within a major version, and strives to avoid breaking changes for key features. Some features, which are cosmetic, still under development, or depend on 3rd party services, are not covered by this. -Things considered stable for 2.x: +Things considered stable for 3.x: * The query language and data model * Alerting and recording rules @@ -18,21 +18,25 @@ Things considered stable for 2.x: * Configuration file format (minus the service discovery remote read/write, see below) * Rule/alert file format * Console template syntax and semantics -* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/). +* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/) and receiving +* Agent mode +* OTLP receiver endpoint -Things considered unstable for 2.x: +Things considered unstable for 3.x: * Any feature listed as experimental or subject to change, including: - * The [`holt_winters` PromQL function](https://github.com/prometheus/prometheus/issues/2458) - * Remote write receiving, remote read and the remote read endpoint + * The [`double_exponential_smoothing` PromQL function](https://github.com/prometheus/prometheus/issues/2458) + * Remote read and the remote read endpoint * Server-side HTTPS and basic authentication -* Service discovery integrations, with the exception of `static_configs` and `file_sd_configs` +* Service discovery integrations, with the exception of `static_configs`, `file_sd_configs` and `http_sd_config` * Go APIs of packages that are part of the server * HTML generated by the web UI * The metrics in the /metrics endpoint of Prometheus itself * Exact on-disk format. Potential changes however, will be forward compatible and transparently handled by Prometheus * The format of the logs +Prometheus 2.x stability guarantees can be found [in the 2.x documentation](https://prometheus.io/docs/prometheus/2.55/stability/). + As long as you are not using any features marked as experimental/unstable, an upgrade within a major version can usually be performed without any operational adjustments and very little risk that anything will break. Any breaking changes From d87f7440ca5da009ea885cb23d390ce412ddc681 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Thu, 24 Oct 2024 11:51:41 -0700 Subject: [PATCH 309/339] support int exemplar value type When the exemplar type is an int, it incorrectly gets converted to a 0 when DoubleValue() is called on the exemplar. This adds a check to ensure that the value is converted properly based on the type. Signed-off-by: Charlie Le --- .../prometheusremotewrite/helper.go | 10 +++++- .../prometheusremotewrite/helper_test.go | 35 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index fd7f58f07..f7fede258 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -351,9 +351,17 @@ func getPromExemplars[T exemplarType](ctx context.Context, everyN *everyNTimes, exemplarRunes := 0 promExemplar := prompb.Exemplar{ - Value: exemplar.DoubleValue(), Timestamp: timestamp.FromTime(exemplar.Timestamp().AsTime()), } + switch exemplar.ValueType() { + case pmetric.ExemplarValueTypeInt: + promExemplar.Value = float64(exemplar.IntValue()) + case pmetric.ExemplarValueTypeDouble: + promExemplar.Value = exemplar.DoubleValue() + default: + return nil, fmt.Errorf("unsupported exemplar value type: %v", exemplar.ValueType()) + } + if traceID := exemplar.TraceID(); !traceID.IsEmpty() { val := hex.EncodeToString(traceID[:]) exemplarRunes += utf8.RuneCountInString(traceIDKey) + utf8.RuneCountInString(val) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index a48a57b06..9a994c5a4 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -406,3 +406,38 @@ func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) { }) } } + +func TestGetPromExemplars(t *testing.T) { + ctx := context.Background() + everyN := &everyNTimes{n: 1} + + t.Run("Exemplars with int value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetIntValue(42) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, float64(42), exemplars[0].Value) + }) + + t.Run("Exemplars with double value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetDoubleValue(69.420) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, 69.420, exemplars[0].Value) + }) + + t.Run("Exemplars with unsupported value type", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + _, err := getPromExemplars(ctx, everyN, pt) + assert.Error(t, err) + }) +} From 372b83d7b8601247355531a91487cd98ef2970fe Mon Sep 17 00:00:00 2001 From: gopi Date: Sat, 26 Oct 2024 01:10:15 +0530 Subject: [PATCH 310/339] Documented that WAL can still be written after memory-snapshot-on-shutdown (#15179) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documented that WAL can still be written after memory-snapshot-on-shutdown - #10824 Co-authored-by: Björn Rabenstein Signed-off-by: gopi --------- Signed-off-by: Gopi-eng2202 Signed-off-by: gopi Co-authored-by: Björn Rabenstein --- docs/feature_flags.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/feature_flags.md b/docs/feature_flags.md index c2de68dec..4be11ed47 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -23,9 +23,8 @@ Exemplar storage is implemented as a fixed size circular buffer that stores exem `--enable-feature=memory-snapshot-on-shutdown` -This takes the snapshot of the chunks that are in memory along with the series information when shutting down and stores -it on disk. This will reduce the startup time since the memory state can be restored with this snapshot and m-mapped -chunks without the need of WAL replay. +This takes a snapshot of the chunks that are in memory along with the series information when shutting down and stores it on disk. This will reduce the startup time since the memory state can now be restored with this snapshot +and m-mapped chunks, while a WAL replay from disk is only needed for the parts of the WAL that are not part of the snapshot. ## Extra scrape metrics From 3acb3144fe9c735169833c20bb384f1cb28fa23b Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Sat, 26 Oct 2024 09:03:10 +0200 Subject: [PATCH 311/339] update CHANGELOG Signed-off-by: Jan Fajerski --- CHANGELOG.md | 44 +++++++++----------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9ed60331..ab454a9fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,14 @@ ## unreleased * [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136 -* [CHANGE] Remote-write: default enable_http2 to false. -* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 +* [CHANGE] Remote-write: default enable_http2 to false. #15219 +* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164 +* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178 +* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657 +* [CHANGE] Adopt log/slog and remove go-kit/log. #14906 +* [CHANGE] Disallow configuring AM with the v1 api. #13883 +* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 +* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196 - [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 - [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 @@ -54,6 +60,7 @@ As is traditional with a beta release, we do **not** recommend users install 3.0 * [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643 * [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875 * [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 +* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 ## 2.55.0 / 2024-10-22 @@ -88,39 +95,6 @@ As is traditional with a beta release, we do **not** recommend users install 3.0 * [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 * [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 * [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 -* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 - -## 2.55.0-rc.0 / 2024-09-20 - -* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 -* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 -* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 -* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 -* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 -* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200 -* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346 -* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403 -* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506 -* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706 -* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 -* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 -* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 -* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 -* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655 -* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 -* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 -* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 -* [ENHANCEMENT] API: Support multiple listening addresses. #14665 -* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 -* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948 -* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 -* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 -* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 -* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 -* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766 -* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 -* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 -* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 ## 2.54.1 / 2024-08-27 From bab587b9dca16274e38babbbf56efba50956dbd2 Mon Sep 17 00:00:00 2001 From: Pedro Tanaka Date: Sun, 27 Oct 2024 02:06:34 +0200 Subject: [PATCH 312/339] Agent: allow for ingestion of CT samples (#15124) * Remove unused option from HeadOptions Signed-off-by: Pedro Tanaka * Improve docs for appendable() method in head appender Signed-off-by: Pedro Tanaka * Ingest CT (float) samples in Agent DB Signed-off-by: Pedro Tanaka * allow for ingestion of CT native histogram Signed-off-by: Pedro Tanaka * adding some verification for ct ts Signed-off-by: Pedro Tanaka * Validating CT histogram before append and add newly created series to pending series Signed-off-by: Pedro Tanaka * checking the wal for written samples Signed-off-by: Pedro Tanaka * Checking for samples in test Signed-off-by: Pedro Tanaka * adding case for validations Signed-off-by: Pedro Tanaka * fixing comparison when dedupelabels is enabled Signed-off-by: Pedro Tanaka * unite tests, use table testing Signed-off-by: Pedro Tanaka * Implement CT related methods in timestampTracker for write storage Signed-off-by: Pedro Tanaka * adding error case to test Signed-off-by: Pedro Tanaka * removing unused fields Signed-off-by: Pedro Tanaka * Updating lastTs for series when adding CT to invalidate duplicates Signed-off-by: Pedro Tanaka * making sure that updating the lastTS wont cause OOO later on in Commit(); Signed-off-by: Pedro Tanaka --------- Signed-off-by: Pedro Tanaka --- storage/remote/write.go | 24 ++-- tsdb/agent/db.go | 136 ++++++++++++++++++++-- tsdb/agent/db_test.go | 246 ++++++++++++++++++++++++++++++++++++++++ tsdb/head.go | 4 - tsdb/head_append.go | 7 +- 5 files changed, 395 insertions(+), 22 deletions(-) diff --git a/storage/remote/write.go b/storage/remote/write.go index 00e4fa3a0..639f34452 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -312,8 +312,23 @@ func (t *timestampTracker) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, return 0, nil } -func (t *timestampTracker) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { - // TODO: Implement +func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64) (storage.SeriesRef, error) { + t.samples++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } + return 0, nil +} + +func (t *timestampTracker) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { + t.histograms++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } return 0, nil } @@ -323,11 +338,6 @@ func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, return 0, nil } -func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) { - // AppendCTZeroSample is no-op for remote-write for now. - return 0, nil -} - // Commit implements storage.Appender. func (t *timestampTracker) Commit() error { t.writeStorage.samplesIn.incr(t.samples + t.exemplars + t.histograms) diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 5de84c93a..3863e6cd9 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -976,19 +976,139 @@ func (a *appender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int return storage.SeriesRef(series.ref), nil } -func (a *appender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { - // TODO(bwplotka/arthursens): Wire metadata in the Agent's appender. - return 0, nil -} - func (a *appender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Metadata) (storage.SeriesRef, error) { // TODO: Wire metadata in the Agent's appender. return 0, nil } -func (a *appender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) { - // TODO(bwplotka): Wire metadata in the Agent's appender. - return 0, nil +func (a *appender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + if err := h.Validate(); err != nil { + return 0, err + } + } + if fh != nil { + if err := fh.Validate(); err != nil { + return 0, err + } + } + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + // Ensure no empty labels have gotten through. + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + var created bool + series, created = a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: series.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + } + + series.Lock() + defer series.Unlock() + + if ct <= a.minValidTime(series.lastTs) { + return 0, storage.ErrOutOfOrderCT + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + switch { + case h != nil: + zeroHistogram := &histogram.Histogram{} + a.pendingHistograms = append(a.pendingHistograms, record.RefHistogramSample{ + Ref: series.ref, + T: ct, + H: zeroHistogram, + }) + a.histogramSeries = append(a.histogramSeries, series) + case fh != nil: + a.pendingFloatHistograms = append(a.pendingFloatHistograms, record.RefFloatHistogramSample{ + Ref: series.ref, + T: ct, + FH: &histogram.FloatHistogram{}, + }) + a.floatHistogramSeries = append(a.floatHistogramSeries, series) + } + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + return storage.SeriesRef(series.ref), nil +} + +func (a *appender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + newSeries, created := a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: newSeries.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + + series = newSeries + } + + series.Lock() + defer series.Unlock() + + if t <= a.minValidTime(series.lastTs) { + a.metrics.totalOutOfOrderSamples.Inc() + return 0, storage.ErrOutOfOrderSample + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + // NOTE: always modify pendingSamples and sampleSeries together. + a.pendingSamples = append(a.pendingSamples, record.RefSample{ + Ref: series.ref, + T: ct, + V: 0, + }) + a.sampleSeries = append(a.sampleSeries, series) + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + + return storage.SeriesRef(series.ref), nil } // Commit submits the collected samples and purges the batch. diff --git a/tsdb/agent/db_test.go b/tsdb/agent/db_test.go index 4d5fda25d..b28c29095 100644 --- a/tsdb/agent/db_test.go +++ b/tsdb/agent/db_test.go @@ -15,7 +15,9 @@ package agent import ( "context" + "errors" "fmt" + "io" "math" "path/filepath" "strconv" @@ -29,6 +31,7 @@ import ( "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" @@ -933,6 +936,249 @@ func TestDBOutOfOrderTimeWindow(t *testing.T) { } } +type walSample struct { + t int64 + f float64 + h *histogram.Histogram + lbls labels.Labels + ref storage.SeriesRef +} + +func TestDBCreatedTimestampSamplesIngestion(t *testing.T) { + t.Parallel() + + type appendableSample struct { + t int64 + ct int64 + v float64 + lbls labels.Labels + h *histogram.Histogram + expectsError bool + } + + testHistogram := tsdbutil.GenerateTestHistograms(1)[0] + zeroHistogram := &histogram.Histogram{} + + lbls := labelsForTest(t.Name(), 1) + defLbls := labels.New(lbls[0]...) + + testCases := []struct { + name string + inputSamples []appendableSample + expectedSamples []*walSample + expectedSeriesCount int + }{ + { + name: "in order ct+normal sample/floatSamples", + inputSamples: []appendableSample{ + {t: 100, ct: 1, v: 10, lbls: defLbls}, + {t: 101, ct: 1, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, f: 0, lbls: defLbls}, + {t: 100, f: 10, lbls: defLbls}, + {t: 101, f: 10, lbls: defLbls}, + }, + }, + { + name: "CT+float && CT+histogram samples", + inputSamples: []appendableSample{ + { + t: 100, + ct: 30, + v: 20, + lbls: defLbls, + }, + { + t: 300, + ct: 230, + h: testHistogram, + lbls: defLbls, + }, + }, + expectedSamples: []*walSample{ + {t: 30, f: 0, lbls: defLbls}, + {t: 100, f: 20, lbls: defLbls}, + {t: 230, h: zeroHistogram, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 1, + }, + { + name: "CT+float && CT+histogram samples with error", + inputSamples: []appendableSample{ + { + // invalid CT + t: 100, + ct: 100, + v: 10, + lbls: defLbls, + expectsError: true, + }, + { + // invalid CT histogram + t: 300, + ct: 300, + h: testHistogram, + lbls: defLbls, + expectsError: true, + }, + }, + expectedSamples: []*walSample{ + {t: 100, f: 10, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 0, + }, + { + name: "In order ct+normal sample/histogram", + inputSamples: []appendableSample{ + {t: 100, h: testHistogram, ct: 1, lbls: defLbls}, + {t: 101, h: testHistogram, ct: 1, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, h: &histogram.Histogram{}}, + {t: 100, h: testHistogram}, + {t: 101, h: &histogram.Histogram{CounterResetHint: histogram.NotCounterReset}}, + }, + }, + { + name: "ct+normal then OOO sample/float", + inputSamples: []appendableSample{ + {t: 60_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 120_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 180_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 50_000, ct: 40_000, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 40_000, f: 0, lbls: defLbls}, + {t: 50_000, f: 10, lbls: defLbls}, + {t: 60_000, f: 10, lbls: defLbls}, + {t: 120_000, f: 10, lbls: defLbls}, + {t: 180_000, f: 10, lbls: defLbls}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 360_000 + s := createTestAgentDB(t, reg, opts) + app := s.Appender(context.TODO()) + + for _, sample := range tc.inputSamples { + // We supposed to write a Histogram to the WAL + if sample.h != nil { + _, err := app.AppendHistogramCTZeroSample(0, sample.lbls, sample.t, sample.ct, zeroHistogram, nil) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.AppendHistogram(0, sample.lbls, sample.t, sample.h, nil) + require.NoError(t, err) + } else { + // We supposed to write a float sample to the WAL + _, err := app.AppendCTZeroSample(0, sample.lbls, sample.t, sample.ct) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.Append(0, sample.lbls, sample.t, sample.v) + require.NoError(t, err) + } + } + + require.NoError(t, app.Commit()) + // Close the DB to ensure all data is flushed to the WAL + require.NoError(t, s.Close()) + + // Check that we dont have any OOO samples in the WAL by checking metrics + families, err := reg.Gather() + require.NoError(t, err, "failed to gather metrics") + for _, f := range families { + if f.GetName() == "prometheus_agent_out_of_order_samples_total" { + t.Fatalf("unexpected metric %s", f.GetName()) + } + } + + outputSamples := readWALSamples(t, s.wal.Dir()) + + require.Equal(t, len(tc.expectedSamples), len(outputSamples), "Expected %d samples", len(tc.expectedSamples)) + + for i, expectedSample := range tc.expectedSamples { + for _, sample := range outputSamples { + if sample.t == expectedSample.t && sample.lbls.String() == expectedSample.lbls.String() { + if expectedSample.h != nil { + require.Equal(t, expectedSample.h, sample.h, "histogram value mismatch (sample index %d)", i) + } else { + require.Equal(t, expectedSample.f, sample.f, "value mismatch (sample index %d)", i) + } + } + } + } + }) + } +} + +func readWALSamples(t *testing.T, walDir string) []*walSample { + t.Helper() + sr, err := wlog.NewSegmentsReader(walDir) + require.NoError(t, err) + defer func(sr io.ReadCloser) { + err := sr.Close() + require.NoError(t, err) + }(sr) + + r := wlog.NewReader(sr) + dec := record.NewDecoder(labels.NewSymbolTable()) + + var ( + samples []record.RefSample + histograms []record.RefHistogramSample + + lastSeries record.RefSeries + outputSamples = make([]*walSample, 0) + ) + + for r.Next() { + rec := r.Record() + switch dec.Type(rec) { + case record.Series: + series, err := dec.Series(rec, nil) + require.NoError(t, err) + lastSeries = series[0] + case record.Samples: + samples, err = dec.Samples(rec, samples[:0]) + require.NoError(t, err) + for _, s := range samples { + outputSamples = append(outputSamples, &walSample{ + t: s.T, + f: s.V, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + case record.HistogramSamples: + histograms, err = dec.HistogramSamples(rec, histograms[:0]) + require.NoError(t, err) + for _, h := range histograms { + outputSamples = append(outputSamples, &walSample{ + t: h.T, + h: h.H, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + } + } + + return outputSamples +} + func BenchmarkCreateSeries(b *testing.B) { s := createTestAgentDB(b, nil, DefaultOptions()) defer s.Close() diff --git a/tsdb/head.go b/tsdb/head.go index 2963d781d..c67c438e5 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -155,10 +155,6 @@ type HeadOptions struct { // OutOfOrderTimeWindow is > 0 EnableOOONativeHistograms atomic.Bool - // EnableCreatedTimestampZeroIngestion enables the ingestion of the created timestamp as a synthetic zero sample. - // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md - EnableCreatedTimestampZeroIngestion bool - ChunkRange int64 // ChunkDirRoot is the parent directory of the chunks directory. ChunkDirRoot string diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 170e74044..9c732990b 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -474,9 +474,10 @@ func (a *headAppender) getOrCreate(lset labels.Labels) (s *memSeries, created bo return s, created, nil } -// appendable checks whether the given sample is valid for appending to the series. (if we return false and no error) -// The sample belongs to the out of order chunk if we return true and no error. -// An error signifies the sample cannot be handled. +// appendable checks whether the given sample is valid for appending to the series. +// If the sample is valid and in-order, it returns false with no error. +// If the sample belongs to the out-of-order chunk, it returns true with no error. +// If the sample cannot be handled, it returns an error. func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTimeWindow int64) (isOOO bool, oooDelta int64, err error) { // Check if we can append in the in-order chunk. if t >= minValidTime { From eb3b349024de77bb57499fddf3a7f55b449f95cf Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Mon, 28 Oct 2024 08:31:43 +0100 Subject: [PATCH 313/339] fix(nhcb): created timestamp fails when keeping classic histograms (#15218) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wrong source was used to return the created timestamp, leading to index out of bound panic. One line fix. Refactor the requirement test to be generic and be able to test OpenMetrics and Prom parsers as well. There are some differencies in what the parsers support, the Prom parser doesn't have created timestamp. The protobuf parser uses different formatting to identify the metric for the scrape loop. Each parser represents the sample timestamp differently. Signed-off-by: György Krajcsovits --- model/textparse/nhcbparse.go | 2 +- model/textparse/nhcbparse_test.go | 303 +++++++++++++++++++----------- 2 files changed, 191 insertions(+), 114 deletions(-) diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go index 79f5c892a..d019c327c 100644 --- a/model/textparse/nhcbparse.go +++ b/model/textparse/nhcbparse.go @@ -169,7 +169,7 @@ func (p *NHCBParser) CreatedTimestamp() *int64 { return p.parser.CreatedTimestamp() } case stateCollecting: - return p.parser.CreatedTimestamp() + return p.tempCT case stateEmitting: return p.ctNHCB } diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go index b97de0f7e..6152a8503 100644 --- a/model/textparse/nhcbparse_test.go +++ b/model/textparse/nhcbparse_test.go @@ -524,9 +524,6 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 // "classic" means the option "always_scrape_classic_histograms". // "nhcb" means the option "convert_classic_histograms_to_nhcb". // -// Currently only with the ProtoBuf parser that supports exponential -// histograms. -// // Case 1. Only classic histogram is exposed. // // | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. @@ -550,7 +547,7 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 // | classic=true, nhcb=false | NO | YES | NO |. // | classic=false, nhcb=true | NO | YES | NO |. // | classic=true, nhcb=true | NO | YES | NO |. -func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) { +func TestNHCBParser_NoNHCBWhenExponential(t *testing.T) { type requirement struct { expectClassic bool expectExponential bool @@ -581,134 +578,190 @@ func TestNHCBParserProtoBufParser_NoNHCBWhenExponential(t *testing.T) { }, } + // Create parser from keep classic option. + type parserFactory func(bool) Parser + type testCase struct { name string + parser parserFactory classic bool nhcb bool exp []parsedEntry } + type parserOptions struct { + useUTF8sep bool + hasCreatedTimeStamp bool + } + // Defines the parser name, the Parser factory and the test cases + // supported by the parser and parser options. + parsers := []func() (string, parserFactory, []int, parserOptions){ + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + inputBuf := createTestProtoBufHistogram(t) + return NewProtobufParser(inputBuf.Bytes(), keepClassic, labels.NewSymbolTable()) + } + return "ProtoBuf", factory, []int{1, 2, 3}, parserOptions{useUTF8sep: true, hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestOpenMetricsHistogram() + return NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + } + return "OpenMetrics", factory, []int{1}, parserOptions{hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestPromHistogram() + return NewPromParser([]byte(input), labels.NewSymbolTable()) + } + return "Prometheus", factory, []int{1}, parserOptions{} + }, + } + testCases := []testCase{} - for _, classic := range []bool{false, true} { - for _, nhcb := range []bool{false, true} { - tc := testCase{ - name: "classic=" + strconv.FormatBool(classic) + ", nhcb=" + strconv.FormatBool(nhcb), - classic: classic, - nhcb: nhcb, - exp: []parsedEntry{}, - } - for i, caseI := range cases { - req := caseI[tc.name] - metric := "test_histogram" + strconv.Itoa(i+1) - tc.exp = append(tc.exp, parsedEntry{ - m: metric, - help: "Test histogram " + strconv.Itoa(i+1), - }) - tc.exp = append(tc.exp, parsedEntry{ - m: metric, - typ: model.MetricTypeHistogram, - }) - if req.expectExponential { - // Always expect exponential histogram first. - exponentialSeries := []parsedEntry{ - { - m: metric, - shs: &histogram.Histogram{ - Schema: 3, - Count: 175, - Sum: 0.0008280461746287094, - ZeroThreshold: 2.938735877055719e-39, - ZeroCount: 2, - PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, - NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, - PositiveBuckets: []int64{1, 2, -1, -1}, - NegativeBuckets: []int64{1, 3, -2, -1, 1}, + for _, parser := range parsers { + for _, classic := range []bool{false, true} { + for _, nhcb := range []bool{false, true} { + parserName, parser, supportedCases, options := parser() + requirementName := "classic=" + strconv.FormatBool(classic) + ", nhcb=" + strconv.FormatBool(nhcb) + tc := testCase{ + name: "parser=" + parserName + ", " + requirementName, + parser: parser, + classic: classic, + nhcb: nhcb, + exp: []parsedEntry{}, + } + for _, caseNumber := range supportedCases { + caseI := cases[caseNumber-1] + req, ok := caseI[requirementName] + require.True(t, ok, "Case %d does not have requirement %s", caseNumber, requirementName) + metric := "test_histogram" + strconv.Itoa(caseNumber) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + help: "Test histogram " + strconv.Itoa(caseNumber), + }) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + typ: model.MetricTypeHistogram, + }) + + var ct *int64 + if options.hasCreatedTimeStamp { + ct = int64p(1000) + } + + var bucketForMetric func(string) string + if options.useUTF8sep { + bucketForMetric = func(s string) string { + return "_bucket\xffle\xff" + s + } + } else { + bucketForMetric = func(s string) string { + return "_bucket{le=\"" + s + "\"}" + } + } + + if req.expectExponential { + // Always expect exponential histogram first. + exponentialSeries := []parsedEntry{ + { + m: metric, + shs: &histogram.Histogram{ + Schema: 3, + Count: 175, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 2, + PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, + NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, }, - lset: labels.FromStrings("__name__", metric), - t: int64p(1234568), - ct: int64p(1000), - }, + } + tc.exp = append(tc.exp, exponentialSeries...) } - tc.exp = append(tc.exp, exponentialSeries...) - } - if req.expectClassic { - // Always expect classic histogram series after exponential. - classicSeries := []parsedEntry{ - { - m: metric + "_count", - v: 175, - lset: labels.FromStrings("__name__", metric+"_count"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: metric + "_sum", - v: 0.0008280461746287094, - lset: labels.FromStrings("__name__", metric+"_sum"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: metric + "_bucket\xffle\xff-0.0004899999999999998", - v: 2, - lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0004899999999999998"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: metric + "_bucket\xffle\xff-0.0003899999999999998", - v: 4, - lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0003899999999999998"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: metric + "_bucket\xffle\xff-0.0002899999999999998", - v: 16, - lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0002899999999999998"), - t: int64p(1234568), - ct: int64p(1000), - }, - { - m: metric + "_bucket\xffle\xff+Inf", - v: 175, - lset: labels.FromStrings("__name__", metric+"_bucket", "le", "+Inf"), - t: int64p(1234568), - ct: int64p(1000), - }, - } - tc.exp = append(tc.exp, classicSeries...) - } - if req.expectNHCB { - // Always expect NHCB series after classic. - nhcbSeries := []parsedEntry{ - { - m: metric + "{}", - shs: &histogram.Histogram{ - Schema: histogram.CustomBucketsSchema, - Count: 175, - Sum: 0.0008280461746287094, - PositiveSpans: []histogram.Span{{Length: 4}}, - PositiveBuckets: []int64{2, 0, 10, 147}, - CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, + if req.expectClassic { + // Always expect classic histogram series after exponential. + classicSeries := []parsedEntry{ + { + m: metric + "_count", + v: 175, + lset: labels.FromStrings("__name__", metric+"_count"), + t: int64p(1234568), + ct: ct, }, - lset: labels.FromStrings("__name__", metric), - t: int64p(1234568), - ct: int64p(1000), - }, + { + m: metric + "_sum", + v: 0.0008280461746287094, + lset: labels.FromStrings("__name__", metric+"_sum"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0004899999999999998"), + v: 2, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0004899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0003899999999999998"), + v: 4, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0003899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0002899999999999998"), + v: 16, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0002899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("+Inf"), + v: 175, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "+Inf"), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, classicSeries...) + } + if req.expectNHCB { + // Always expect NHCB series after classic. + nhcbSeries := []parsedEntry{ + { + m: metric + "{}", + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 175, + Sum: 0.0008280461746287094, + PositiveSpans: []histogram.Span{{Length: 4}}, + PositiveBuckets: []int64{2, 0, 10, 147}, + CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, nhcbSeries...) } - tc.exp = append(tc.exp, nhcbSeries...) } + testCases = append(testCases, tc) } - testCases = append(testCases, tc) } } - inputBuf := createTestProtoBufHistogram(t) - for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - p := NewProtobufParser(inputBuf.Bytes(), tc.classic, labels.NewSymbolTable()) + p := tc.parser(tc.classic) if tc.nhcb { p = NewNHCBParser(p, labels.NewSymbolTable(), tc.classic) } @@ -860,3 +913,27 @@ metric: < return buf } + +func createTestOpenMetricsHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234.568 +test_histogram1_sum 0.0008280461746287094 1234.568 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234.568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234.568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234.568 +test_histogram1_bucket{le="+Inf"} 175 1234.568 +test_histogram1_created 1 +# EOF` +} + +func createTestPromHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234568 +test_histogram1_sum 0.0008280461746287094 1234768 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234568 +test_histogram1_bucket{le="+Inf"} 175 1234568` +} From 685d6d169f3d3723f72a7d0d4481bded09a614ec Mon Sep 17 00:00:00 2001 From: 3Juhwan <13selfesteem91@naver.com> Date: Sun, 27 Oct 2024 17:43:33 +0900 Subject: [PATCH 314/339] refactor: reorder fields in defaultSDConfig initialization for consistency Signed-off-by: 3Juhwan <13selfesteem91@naver.com> --- discovery/http/http.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discovery/http/http.go b/discovery/http/http.go index 004a5b4ae..65404694c 100644 --- a/discovery/http/http.go +++ b/discovery/http/http.go @@ -41,8 +41,8 @@ import ( var ( // DefaultSDConfig is the default HTTP SD configuration. DefaultSDConfig = SDConfig{ - RefreshInterval: model.Duration(60 * time.Second), HTTPClientConfig: config.DefaultHTTPClientConfig, + RefreshInterval: model.Duration(60 * time.Second), } userAgent = fmt.Sprintf("Prometheus/%s", version.Version) matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`) From e452308e3768825e2f22dfae1e9b9bb80b8c48f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Mon, 28 Oct 2024 12:09:23 +0200 Subject: [PATCH 315/339] discovery/kubernetes: optimize resolvePodRef MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolvePodRef is in a hot path: ``` ROUTINE ======================== github.com/prometheus/prometheus/discovery/kubernetes.(*Endpoints).resolvePodRef in discovery/kubernetes/endpoints.go 2.50TB 2.66TB (flat, cum) 22.28% of Total . . 447:func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { . . 448: if ref == nil || ref.Kind != "Pod" { . . 449: return nil . . 450: } 2.50TB 2.50TB 451: p := &apiv1.Pod{} . . 452: p.Namespace = ref.Namespace . . 453: p.Name = ref.Name . . 454: . 156.31GB 455: obj, exists, err := e.podStore.Get(p) . . 456: if err != nil { . . 457: level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) . . 458: return nil . . 459: } . . 460: if !exists { ``` This is some low hanging fruit that we can easily optimize. The key of an object has format "namespace/name" so generate that inside of Prometheus itself and use pooling. ``` goos: linux goarch: amd64 pkg: github.com/prometheus/prometheus/discovery/kubernetes cpu: Intel(R) Core(TM) i9-10885H CPU @ 2.40GHz │ olddisc │ newdisc │ │ sec/op │ sec/op vs base │ ResolvePodRef-16 516.3n ± 17% 289.5n ± 7% -43.92% (p=0.000 n=10) │ olddisc │ newdisc │ │ B/op │ B/op vs base │ ResolvePodRef-16 1168.00 ± 0% 24.00 ± 0% -97.95% (p=0.000 n=10) │ olddisc │ newdisc │ │ allocs/op │ allocs/op vs base │ ResolvePodRef-16 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal ``` Signed-off-by: Giedrius Statkevičius --- discovery/kubernetes/endpoints.go | 32 +++++++++++++++++++++++++- discovery/kubernetes/endpoints_test.go | 21 +++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index 5ba9df627..934f37ee4 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -20,6 +20,8 @@ import ( "log/slog" "net" "strconv" + "strings" + "sync" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -453,15 +455,43 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { return tg } +var objKeyPool = sync.Pool{} + +func generateObjKey(namespace, name string) (string, *strings.Builder) { + var sb *strings.Builder + + b := objKeyPool.Get() + if b == nil { + sb = &strings.Builder{} + } else { + sb = b.(*strings.Builder) + } + + sb.Reset() + if namespace == "" { + _, _ = sb.WriteString(name) + return sb.String(), sb + } + + _, _ = sb.WriteString(namespace) + _, _ = sb.WriteRune('/') + _, _ = sb.WriteString(name) + return sb.String(), sb +} + func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } + p := &apiv1.Pod{} p.Namespace = ref.Namespace p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + key, sb := generateObjKey(p.Namespace, p.Name) + defer objKeyPool.Put(sb) + + obj, exists, err := e.podStore.GetByKey(key) if err != nil { e.logger.Error("resolving pod ref failed", "err", err) return nil diff --git a/discovery/kubernetes/endpoints_test.go b/discovery/kubernetes/endpoints_test.go index 4af688960..a1ac6e5d4 100644 --- a/discovery/kubernetes/endpoints_test.go +++ b/discovery/kubernetes/endpoints_test.go @@ -18,10 +18,12 @@ import ( "testing" "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -1257,3 +1259,22 @@ func TestEndpointsDiscoverySidecarContainer(t *testing.T) { }, }.Run(t) } + +func BenchmarkResolvePodRef(b *testing.B) { + indexer := cache.NewIndexer(cache.DeletionHandlingMetaNamespaceKeyFunc, nil) + e := &Endpoints{ + podStore: indexer, + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + p := e.resolvePodRef(&v1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "foo", + }) + require.Nil(b, p) + } +} From 716fd5b11f6f6f414bbc602b9724b028140006f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Mon, 28 Oct 2024 16:19:56 +0200 Subject: [PATCH 316/339] discovery/kubernetes: use namespacedName MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Giedrius Statkevičius --- discovery/kubernetes/endpoints.go | 48 +++---------------------------- 1 file changed, 4 insertions(+), 44 deletions(-) diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index 934f37ee4..14d3bc7a9 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -20,8 +20,6 @@ import ( "log/slog" "net" "strconv" - "strings" - "sync" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -104,10 +102,7 @@ func NewEndpoints(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node return } - ep := &apiv1.Endpoints{} - ep.Namespace = svc.Namespace - ep.Name = svc.Name - obj, exists, err := e.endpointsStore.Get(ep) + obj, exists, err := e.endpointsStore.GetByKey(namespacedName(svc.Namespace, svc.Name)) if exists && err == nil { e.enqueue(obj.(*apiv1.Endpoints)) } @@ -455,43 +450,12 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { return tg } -var objKeyPool = sync.Pool{} - -func generateObjKey(namespace, name string) (string, *strings.Builder) { - var sb *strings.Builder - - b := objKeyPool.Get() - if b == nil { - sb = &strings.Builder{} - } else { - sb = b.(*strings.Builder) - } - - sb.Reset() - if namespace == "" { - _, _ = sb.WriteString(name) - return sb.String(), sb - } - - _, _ = sb.WriteString(namespace) - _, _ = sb.WriteRune('/') - _, _ = sb.WriteString(name) - return sb.String(), sb -} - func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - - key, sb := generateObjKey(p.Namespace, p.Name) - defer objKeyPool.Put(sb) - - obj, exists, err := e.podStore.GetByKey(key) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { e.logger.Error("resolving pod ref failed", "err", err) return nil @@ -503,11 +467,7 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { } func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { - svc := &apiv1.Service{} - svc.Namespace = ns - svc.Name = name - - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { e.logger.Error("retrieving service failed", "err", err) return @@ -515,7 +475,7 @@ func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } From 58fedb6b61b7a6918226d8c791be538aa4cc02ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Mon, 28 Oct 2024 17:16:37 +0200 Subject: [PATCH 317/339] discovery/kubernetes: optimize more gets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Giedrius Statkevičius --- discovery/kubernetes/endpointslice.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index 8f58ba353..45bc43eff 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -467,11 +467,8 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { e.logger.Error("resolving pod ref failed", "err", err) return nil @@ -484,19 +481,19 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgroup.Group) { var ( - svc = &apiv1.Service{} found bool + name string ) - svc.Namespace = esa.namespace() + ns := esa.namespace() // Every EndpointSlice object has the Service they belong to in the // kubernetes.io/service-name label. - svc.Name, found = esa.labels()[esa.labelServiceName()] + name, found = esa.labels()[esa.labelServiceName()] if !found { return } - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { e.logger.Error("retrieving service failed", "err", err) return @@ -504,7 +501,7 @@ func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgro if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } From 7cda23ba3270ad305f550d98d42e53b7ccf18907 Mon Sep 17 00:00:00 2001 From: Jack Westbrook Date: Mon, 28 Oct 2024 17:05:10 +0100 Subject: [PATCH 318/339] fix(lezer-promql): fix missing types export in package.json (#15161) Signed-off-by: Jack Westbrook --- web/ui/module/lezer-promql/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/web/ui/module/lezer-promql/package.json b/web/ui/module/lezer-promql/package.json index 3eadc3a53..0883552c8 100644 --- a/web/ui/module/lezer-promql/package.json +++ b/web/ui/module/lezer-promql/package.json @@ -5,6 +5,7 @@ "main": "dist/index.cjs", "type": "module", "exports": { + "types": "./dist/index.d.ts", "import": "./dist/index.es.js", "require": "./dist/index.cjs" }, From 5408184452b8e612bba51bd49d228e41e2962573 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Mon, 28 Oct 2024 17:10:34 +0100 Subject: [PATCH 319/339] react-app: bump version in lock file Signed-off-by: Jan Fajerski --- web/ui/react-app/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/ui/react-app/package-lock.json b/web/ui/react-app/package-lock.json index 667eb0b37..f8d1cfb3e 100644 --- a/web/ui/react-app/package-lock.json +++ b/web/ui/react-app/package-lock.json @@ -1,12 +1,12 @@ { "name": "@prometheus-io/app", - "version": "0.55.0-rc.0", + "version": "0.55.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@prometheus-io/app", - "version": "0.55.0-rc.0", + "version": "0.55.0", "dependencies": { "@codemirror/autocomplete": "^6.17.0", "@codemirror/commands": "^6.6.0", @@ -24,7 +24,7 @@ "@lezer/lr": "^1.4.2", "@nexucis/fuzzy": "^0.4.1", "@nexucis/kvsearch": "^0.8.1", - "@prometheus-io/codemirror-promql": "0.55.0-rc.0", + "@prometheus-io/codemirror-promql": "0.55.0", "bootstrap": "^4.6.2", "css.escape": "^1.5.1", "downshift": "^9.0.6", From 706dcfeecff0d64dc3c4451d7cf59b0a5c5c82f7 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Tue, 29 Oct 2024 10:40:46 +0100 Subject: [PATCH 320/339] tsdb.CircularExemplarStorage: Avoid racing (#15231) * tsdb.CircularExemplarStorage: Avoid racing --------- Signed-off-by: Arve Knudsen --- tsdb/exemplar.go | 22 +++++++++++----------- tsdb/exemplar_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index d32870f70..31d461bed 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -152,13 +152,13 @@ func (ce *CircularExemplarStorage) Querier(_ context.Context) (storage.ExemplarQ func (ce *CircularExemplarStorage) Select(start, end int64, matchers ...[]*labels.Matcher) ([]exemplar.QueryResult, error) { ret := make([]exemplar.QueryResult, 0) + ce.lock.RLock() + defer ce.lock.RUnlock() + if len(ce.exemplars) == 0 { return ret, nil } - ce.lock.RLock() - defer ce.lock.RUnlock() - // Loop through each index entry, which will point us to first/last exemplar for each series. for _, idx := range ce.index { var se exemplar.QueryResult @@ -281,13 +281,13 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { l = 0 } + ce.lock.Lock() + defer ce.lock.Unlock() + if l == int64(len(ce.exemplars)) { return 0 } - ce.lock.Lock() - defer ce.lock.Unlock() - oldBuffer := ce.exemplars oldNextIndex := int64(ce.nextIndex) @@ -349,6 +349,11 @@ func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry, buf []byt } func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemplar) error { + // TODO(bwplotka): This lock can lock all scrapers, there might high contention on this on scale. + // Optimize by moving the lock to be per series (& benchmark it). + ce.lock.Lock() + defer ce.lock.Unlock() + if len(ce.exemplars) == 0 { return storage.ErrExemplarsDisabled } @@ -356,11 +361,6 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp var buf [1024]byte seriesLabels := l.Bytes(buf[:]) - // TODO(bwplotka): This lock can lock all scrapers, there might high contention on this on scale. - // Optimize by moving the lock to be per series (& benchmark it). - ce.lock.Lock() - defer ce.lock.Unlock() - idx, ok := ce.index[string(seriesLabels)] err := ce.validateExemplar(idx, e, true) if err != nil { diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index 7723ec389..dbd34cc48 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -20,6 +20,7 @@ import ( "reflect" "strconv" "strings" + "sync" "testing" "github.com/prometheus/client_golang/prometheus" @@ -499,3 +500,40 @@ func BenchmarkResizeExemplars(b *testing.B) { }) } } + +// TestCircularExemplarStorage_Concurrent_AddExemplar_Resize tries to provoke a data race between AddExemplar and Resize. +// Run with race detection enabled. +func TestCircularExemplarStorage_Concurrent_AddExemplar_Resize(t *testing.T) { + exs, err := NewCircularExemplarStorage(0, eMetrics) + require.NoError(t, err) + es := exs.(*CircularExemplarStorage) + + l := labels.FromStrings("service", "asdf") + e := exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", "qwerty"), + Value: 0.1, + Ts: 1, + } + + var wg sync.WaitGroup + wg.Add(1) + t.Cleanup(wg.Wait) + + started := make(chan struct{}) + + go func() { + defer wg.Done() + + <-started + for i := 0; i < 100; i++ { + require.NoError(t, es.AddExemplar(l, e)) + } + }() + + for i := 0; i < 100; i++ { + es.Resize(int64(i + 1)) + if i == 0 { + close(started) + } + } +} From a44db5f7842a7b3e82687f6fb8d87076a493d858 Mon Sep 17 00:00:00 2001 From: shenpengfeng Date: Tue, 29 Oct 2024 17:58:44 +0800 Subject: [PATCH 321/339] chore: fix function name in comment Signed-off-by: shenpengfeng --- storage/buffer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/buffer.go b/storage/buffer.go index ad504ad5d..e847c10e6 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -626,7 +626,7 @@ func addF(s fSample, buf []fSample, r *sampleRing) []fSample { return buf } -// addF adds an hSample to a (specialized) hSample buffer. +// addH adds an hSample to a (specialized) hSample buffer. func addH(s hSample, buf []hSample, r *sampleRing) []hSample { l := len(buf) // Grow the ring buffer if it fits no more elements. From b6c538972c4d738896c0ee34fd7d8efcc6e2c2f8 Mon Sep 17 00:00:00 2001 From: Nicolas Takashi Date: Tue, 29 Oct 2024 12:34:02 +0000 Subject: [PATCH 322/339] [REFACTORY] simplify appender commit (#15112) * [REFACTOR] simplify appender commit Signed-off-by: Nicolas Takashi Signed-off-by: Arthur Silva Sens Co-authored-by: George Krajcsovits Co-authored-by: Arthur Silva Sens --- tsdb/head_append.go | 852 +++++++++++++++++++++------------------- tsdb/head_bench_test.go | 87 ++++ 2 files changed, 538 insertions(+), 401 deletions(-) diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 9c732990b..603b96cfc 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -984,23 +984,38 @@ func exemplarsForEncoding(es []exemplarWithSeriesRef) []record.RefExemplar { return ret } -// Commit writes to the WAL and adds the data to the Head. -// TODO(codesome): Refactor this method to reduce indentation and make it more readable. -func (a *headAppender) Commit() (err error) { - if a.closed { - return ErrAppenderClosed - } - defer func() { a.closed = true }() - - if err := a.log(); err != nil { - _ = a.Rollback() // Most likely the same error will happen again. - return fmt.Errorf("write to WAL: %w", err) - } - - if a.head.writeNotified != nil { - a.head.writeNotified.Notify() - } +type appenderCommitContext struct { + floatsAppended int + histogramsAppended int + // Number of samples out of order but accepted: with ooo enabled and within time window. + oooFloatsAccepted int + oooHistogramAccepted int + // Number of samples rejected due to: out of order but OOO support disabled. + floatOOORejected int + histoOOORejected int + // Number of samples rejected due to: out of order but too old (OOO support enabled, but outside time window). + floatTooOldRejected int + histoTooOldRejected int + // Number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled). + floatOOBRejected int + histoOOBRejected int + inOrderMint int64 + inOrderMaxt int64 + oooMinT int64 + oooMaxT int64 + wblSamples []record.RefSample + wblHistograms []record.RefHistogramSample + wblFloatHistograms []record.RefFloatHistogramSample + oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef + oooMmapMarkersCount int + oooRecords [][]byte + oooCapMax int64 + appendChunkOpts chunkOpts + enc record.Encoder +} +// commitExemplars adds all exemplars from headAppender to the head's exemplar storage. +func (a *headAppender) commitExemplars() { // No errors logging to WAL, so pass the exemplars along to the in memory storage. for _, e := range a.exemplars { s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref)) @@ -1018,6 +1033,396 @@ func (a *headAppender) Commit() (err error) { a.head.logger.Debug("Unknown error while adding exemplar", "err", err) } } +} + +func (acc *appenderCommitContext) collectOOORecords(a *headAppender) { + if a.head.wbl == nil { + // WBL is not enabled. So no need to collect. + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil + acc.oooMmapMarkersCount = 0 + return + } + + // The m-map happens before adding a new sample. So we collect + // the m-map markers first, and then samples. + // WBL Graphically: + // WBL Before this Commit(): [old samples before this commit for chunk 1] + // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] + if acc.oooMmapMarkers != nil { + markers := make([]record.RefMmapMarker, 0, acc.oooMmapMarkersCount) + for ref, mmapRefs := range acc.oooMmapMarkers { + for _, mmapRef := range mmapRefs { + markers = append(markers, record.RefMmapMarker{ + Ref: ref, + MmapRef: mmapRef, + }) + } + } + r := acc.enc.MmapMarkers(markers, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + if len(acc.wblSamples) > 0 { + r := acc.enc.Samples(acc.wblSamples, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblHistograms) > 0 { + r := acc.enc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblFloatHistograms) > 0 { + r := acc.enc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil +} + +// handleAppendableError processes errors encountered during sample appending and updates +// the provided counters accordingly. +// +// Parameters: +// - err: The error encountered during appending. +// - appended: Pointer to the counter tracking the number of successfully appended samples. +// - oooRejected: Pointer to the counter tracking the number of out-of-order samples rejected. +// - oobRejected: Pointer to the counter tracking the number of out-of-bounds samples rejected. +// - tooOldRejected: Pointer to the counter tracking the number of too-old samples rejected. +func handleAppendableError(err error, appended, oooRejected, oobRejected, tooOldRejected *int) { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + *appended-- + *oooRejected++ + case errors.Is(err, storage.ErrOutOfBounds): + *appended-- + *oobRejected++ + case errors.Is(err, storage.ErrTooOldSample): + *appended-- + *tooOldRejected++ + default: + *appended-- + } +} + +// commitSamples processes and commits the samples in the headAppender to the series. +// It handles both in-order and out-of-order samples, updating the appenderCommitContext +// with the results of the append operations. +// +// The function iterates over the samples in the headAppender and attempts to append each sample +// to its corresponding series. It handles various error cases such as out-of-order samples, +// out-of-bounds samples, and too-old samples, updating the appenderCommitContext accordingly. +// +// For out-of-order samples, it checks if the sample can be inserted into the series and updates +// the out-of-order mmap markers if necessary. It also updates the write-ahead log (WBL) samples +// and the minimum and maximum timestamps for out-of-order samples. +// +// For in-order samples, it attempts to append the sample to the series and updates the minimum +// and maximum timestamps for in-order samples. +// +// The function also increments the chunk metrics if a new chunk is created and performs cleanup +// operations on the series after appending the samples. +// +// There are also specific functions to commit histograms and float histograms. +func (a *headAppender) commitSamples(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.samples { + series = a.sampleSeries[i] + series.Lock() + + oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) + if err != nil { + handleAppendableError(err, &acc.floatsAppended, &acc.floatOOORejected, &acc.floatOOBRejected, &acc.floatTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != nil means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblSamples = append(acc.wblSamples, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooFloatsAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.floatsAppended-- + } + default: + ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + // The sample is an exact duplicate, and should be silently dropped. + acc.floatsAppended-- + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// For details on the commitHistograms function, see the commitSamples docs. +func (a *headAppender) commitHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.histograms { + series = a.histogramSeries[i] + series.Lock() + + oooSample, _, err := series.appendableHistogram(s.T, s.H, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblHistograms = append(acc.wblHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooHistogramAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.histogramsAppended-- + } + default: + ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + acc.histogramsAppended-- + acc.histoOOORejected++ + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// For details on the commitFloatHistograms function, see the commitSamples docs. +func (a *headAppender) commitFloatHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.floatHistograms { + series = a.floatHistogramSeries[i] + series.Lock() + + oooSample, _, err := series.appendableFloatHistogram(s.T, s.FH, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblFloatHistograms = append(acc.wblFloatHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooHistogramAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.histogramsAppended-- + } + default: + ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + acc.histogramsAppended-- + acc.histoOOORejected++ + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// commitMetadata commits the metadata for each series in the headAppender. +// It iterates over the metadata slice and updates the corresponding series +// with the new metadata information. The series is locked during the update +// to ensure thread safety. +func (a *headAppender) commitMetadata() { + var series *memSeries + for i, m := range a.metadata { + series = a.metadataSeries[i] + series.Lock() + series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} + series.Unlock() + } +} + +// Commit writes to the WAL and adds the data to the Head. +// TODO(codesome): Refactor this method to reduce indentation and make it more readable. +func (a *headAppender) Commit() (err error) { + if a.closed { + return ErrAppenderClosed + } + defer func() { a.closed = true }() + + if err := a.log(); err != nil { + _ = a.Rollback() // Most likely the same error will happen again. + return fmt.Errorf("write to WAL: %w", err) + } + + if a.head.writeNotified != nil { + a.head.writeNotified.Notify() + } + + a.commitExemplars() defer a.head.metrics.activeAppenders.Dec() defer a.head.putAppendBuffer(a.samples) @@ -1028,401 +1433,46 @@ func (a *headAppender) Commit() (err error) { defer a.head.putMetadataBuffer(a.metadata) defer a.head.iso.closeAppend(a.appendID) - var ( - floatsAppended = len(a.samples) - histogramsAppended = len(a.histograms) + len(a.floatHistograms) - // number of samples out of order but accepted: with ooo enabled and within time window - oooFloatsAccepted int - oooHistogramAccepted int - // number of samples rejected due to: out of order but OOO support disabled. - floatOOORejected int - histoOOORejected int - // number of samples rejected due to: that are out of order but too old (OOO support enabled, but outside time window) - floatTooOldRejected int - histoTooOldRejected int - // number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled) - floatOOBRejected int - histoOOBRejected int - inOrderMint int64 = math.MaxInt64 - inOrderMaxt int64 = math.MinInt64 - oooMinT int64 = math.MaxInt64 - oooMaxT int64 = math.MinInt64 - wblSamples []record.RefSample - wblHistograms []record.RefHistogramSample - wblFloatHistograms []record.RefFloatHistogramSample - oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef - oooMmapMarkersCount int - oooRecords [][]byte - oooCapMax = a.head.opts.OutOfOrderCapMax.Load() - series *memSeries - appendChunkOpts = chunkOpts{ + acc := &appenderCommitContext{ + floatsAppended: len(a.samples), + histogramsAppended: len(a.histograms) + len(a.floatHistograms), + inOrderMint: math.MaxInt64, + inOrderMaxt: math.MinInt64, + oooMinT: math.MaxInt64, + oooMaxT: math.MinInt64, + oooCapMax: a.head.opts.OutOfOrderCapMax.Load(), + appendChunkOpts: chunkOpts{ chunkDiskMapper: a.head.chunkDiskMapper, chunkRange: a.head.chunkRange.Load(), samplesPerChunk: a.head.opts.SamplesPerChunk, - } - enc record.Encoder - ) + }, + } + defer func() { - for i := range oooRecords { - a.head.putBytesBuffer(oooRecords[i][:0]) + for i := range acc.oooRecords { + a.head.putBytesBuffer(acc.oooRecords[i][:0]) } }() - collectOOORecords := func() { - if a.head.wbl == nil { - // WBL is not enabled. So no need to collect. - wblSamples = nil - wblHistograms = nil - wblFloatHistograms = nil - oooMmapMarkers = nil - oooMmapMarkersCount = 0 - return - } - // The m-map happens before adding a new sample. So we collect - // the m-map markers first, and then samples. - // WBL Graphically: - // WBL Before this Commit(): [old samples before this commit for chunk 1] - // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] - if oooMmapMarkers != nil { - markers := make([]record.RefMmapMarker, 0, oooMmapMarkersCount) - for ref, mmapRefs := range oooMmapMarkers { - for _, mmapRef := range mmapRefs { - markers = append(markers, record.RefMmapMarker{ - Ref: ref, - MmapRef: mmapRef, - }) - } - } - r := enc.MmapMarkers(markers, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblSamples) > 0 { - r := enc.Samples(wblSamples, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblHistograms) > 0 { - r := enc.HistogramSamples(wblHistograms, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblFloatHistograms) > 0 { - r := enc.FloatHistogramSamples(wblFloatHistograms, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } + a.commitSamples(acc) + a.commitHistograms(acc) + a.commitFloatHistograms(acc) + a.commitMetadata() - wblSamples = nil - wblHistograms = nil - wblFloatHistograms = nil - oooMmapMarkers = nil - } - for i, s := range a.samples { - series = a.sampleSeries[i] - series.Lock() + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOORejected)) + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected)) + a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected)) + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatsAppended)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histogramsAppended)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.oooFloatsAccepted)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.oooHistogramAccepted)) + a.head.updateMinMaxTime(acc.inOrderMint, acc.inOrderMaxt) + a.head.updateMinOOOMaxOOOTime(acc.oooMinT, acc.oooMaxT) - oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - floatsAppended-- - floatOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - floatsAppended-- - floatOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - floatsAppended-- - floatTooOldRejected++ - default: - floatsAppended-- - } - - var ok, chunkCreated bool - - switch { - case err != nil: - // Do nothing here. - case oooSample: - // Sample is OOO and OOO handling is enabled - // and the delta is within the OOO tolerance. - var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger) - if chunkCreated { - r, ok := oooMmapMarkers[series.ref] - if !ok || r != nil { - // !ok means there are no markers collected for these samples yet. So we first flush the samples - // before setting this m-map marker. - - // r != nil means we have already m-mapped a chunk for this series in the same Commit(). - // Hence, before we m-map again, we should add the samples and m-map markers - // seen till now to the WBL records. - collectOOORecords() - } - - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) - } - if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) - } else { - // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ - } - } - if ok { - wblSamples = append(wblSamples, s) - if s.T < oooMinT { - oooMinT = s.T - } - if s.T > oooMaxT { - oooMaxT = s.T - } - oooFloatsAccepted++ - } else { - // Sample is an exact duplicate of the last sample. - // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, - // not with samples in already flushed OOO chunks. - // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - floatsAppended-- - } - default: - ok, chunkCreated = series.append(s.T, s.V, a.appendID, appendChunkOpts) - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - // The sample is an exact duplicate, and should be silently dropped. - floatsAppended-- - } - } - - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - } - - for i, s := range a.histograms { - series = a.histogramSeries[i] - series.Lock() - - oooSample, _, err := series.appendableHistogram(s.T, s.H, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - histogramsAppended-- - histoOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - histogramsAppended-- - histoOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - histogramsAppended-- - histoTooOldRejected++ - default: - histogramsAppended-- - } - - var ok, chunkCreated bool - - switch { - case err != nil: - // Do nothing here. - case oooSample: - // Sample is OOO and OOO handling is enabled - // and the delta is within the OOO tolerance. - var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger) - if chunkCreated { - r, ok := oooMmapMarkers[series.ref] - if !ok || r != nil { - // !ok means there are no markers collected for these samples yet. So we first flush the samples - // before setting this m-map marker. - - // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). - // Hence, before we m-map again, we should add the samples and m-map markers - // seen till now to the WBL records. - collectOOORecords() - } - - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) - } - if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) - } else { - // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ - } - } - if ok { - wblHistograms = append(wblHistograms, s) - if s.T < oooMinT { - oooMinT = s.T - } - if s.T > oooMaxT { - oooMaxT = s.T - } - oooHistogramAccepted++ - } else { - // Sample is an exact duplicate of the last sample. - // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, - // not with samples in already flushed OOO chunks. - // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - histogramsAppended-- - } - default: - ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, appendChunkOpts) - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - histogramsAppended-- - histoOOORejected++ - } - } - - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - } - - for i, s := range a.floatHistograms { - series = a.floatHistogramSeries[i] - series.Lock() - - oooSample, _, err := series.appendableFloatHistogram(s.T, s.FH, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - histogramsAppended-- - histoOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - histogramsAppended-- - histoOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - histogramsAppended-- - histoTooOldRejected++ - default: - histogramsAppended-- - } - - var ok, chunkCreated bool - - switch { - case err != nil: - // Do nothing here. - case oooSample: - // Sample is OOO and OOO handling is enabled - // and the delta is within the OOO tolerance. - var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, oooCapMax, a.head.logger) - if chunkCreated { - r, ok := oooMmapMarkers[series.ref] - if !ok || r != nil { - // !ok means there are no markers collected for these samples yet. So we first flush the samples - // before setting this m-map marker. - - // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). - // Hence, before we m-map again, we should add the samples and m-map markers - // seen till now to the WBL records. - collectOOORecords() - } - - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) - } - if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) - } else { - // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ - } - } - if ok { - wblFloatHistograms = append(wblFloatHistograms, s) - if s.T < oooMinT { - oooMinT = s.T - } - if s.T > oooMaxT { - oooMaxT = s.T - } - oooHistogramAccepted++ - } else { - // Sample is an exact duplicate of the last sample. - // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, - // not with samples in already flushed OOO chunks. - // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - histogramsAppended-- - } - default: - ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, appendChunkOpts) - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - histogramsAppended-- - histoOOORejected++ - } - } - - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - } - - for i, m := range a.metadata { - series = a.metadataSeries[i] - series.Lock() - series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} - series.Unlock() - } - - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOORejected)) - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histoOOORejected)) - a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOBRejected)) - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatTooOldRejected)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatsAppended)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histogramsAppended)) - a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(oooFloatsAccepted)) - a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(oooHistogramAccepted)) - a.head.updateMinMaxTime(inOrderMint, inOrderMaxt) - a.head.updateMinOOOMaxOOOTime(oooMinT, oooMaxT) - - collectOOORecords() + acc.collectOOORecords(a) if a.head.wbl != nil { - if err := a.head.wbl.Log(oooRecords...); err != nil { + if err := a.head.wbl.Log(acc.oooRecords...); err != nil { // TODO(codesome): Currently WBL logging of ooo samples is best effort here since we cannot try logging // until we have found what samples become OOO. We can try having a metric for this failure. // Returning the error here is not correct because we have already put the samples into the memory, diff --git a/tsdb/head_bench_test.go b/tsdb/head_bench_test.go index 51de50ec2..aa2cf2214 100644 --- a/tsdb/head_bench_test.go +++ b/tsdb/head_bench_test.go @@ -14,15 +14,22 @@ package tsdb import ( + "context" "errors" + "fmt" + "math/rand" "strconv" "testing" "github.com/stretchr/testify/require" "go.uber.org/atomic" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/wlog" ) func BenchmarkHeadStripeSeriesCreate(b *testing.B) { @@ -79,6 +86,86 @@ func BenchmarkHeadStripeSeriesCreate_PreCreationFailure(b *testing.B) { } } +func BenchmarkHead_WalCommit(b *testing.B) { + seriesCounts := []int{100, 1000, 10000} + series := genSeries(10000, 10, 0, 0) // Only using the generated labels. + + appendSamples := func(b *testing.B, app storage.Appender, seriesCount int, ts int64) { + var err error + for i, s := range series[:seriesCount] { + var ref storage.SeriesRef + // if i is even, append a sample, else append a histogram. + if i%2 == 0 { + ref, err = app.Append(ref, s.Labels(), ts, float64(ts)) + } else { + h := &histogram.Histogram{ + Count: 7 + uint64(ts*5), + ZeroCount: 2 + uint64(ts), + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: 1, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{ts + 1, 1, -1, 0}, + } + ref, err = app.AppendHistogram(ref, s.Labels(), ts, h, nil) + } + require.NoError(b, err) + + _, err = app.AppendExemplar(ref, s.Labels(), exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), + Value: rand.Float64(), + Ts: ts, + }) + require.NoError(b, err) + } + } + + for _, seriesCount := range seriesCounts { + b.Run(fmt.Sprintf("%d series", seriesCount), func(b *testing.B) { + for _, commits := range []int64{1, 2} { // To test commits that create new series and when the series already exists. + b.Run(fmt.Sprintf("%d commits", commits), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + b.StopTimer() + h, w := newTestHead(b, 10000, wlog.CompressionNone, false) + b.Cleanup(func() { + if h != nil { + h.Close() + } + if w != nil { + w.Close() + } + }) + app := h.Appender(context.Background()) + + appendSamples(b, app, seriesCount, 0) + + b.StartTimer() + require.NoError(b, app.Commit()) + if commits == 2 { + b.StopTimer() + app = h.Appender(context.Background()) + appendSamples(b, app, seriesCount, 1) + b.StartTimer() + require.NoError(b, app.Commit()) + } + b.StopTimer() + h.Close() + h = nil + w.Close() + w = nil + } + }) + } + }) + } +} + type failingSeriesLifecycleCallback struct{} func (failingSeriesLifecycleCallback) PreCreation(labels.Labels) error { return errors.New("failed") } From e2f55c34c9d68347fa9a095f480290473c557c07 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Tue, 29 Oct 2024 14:41:50 +0100 Subject: [PATCH 323/339] fix CHANGELOG formatting and add entry for #14694 Signed-off-by: Jan Fajerski --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab454a9fc..f71b701ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,9 @@ * [CHANGE] Disallow configuring AM with the v1 api. #13883 * [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 * [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196 -- [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 -- [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 +* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 ## 3.0.0-beta.1 / 2024-10-09 From ba11a55df4a8fd4db4150444296cc411566cfd5a Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Tue, 29 Oct 2024 17:00:29 +0100 Subject: [PATCH 324/339] Revert "Process `MemPostings.Delete()` with `GOMAXPROCS` workers" Signed-off-by: Oleg Zaytsev --- tsdb/index/postings.go | 83 ++++++++-------------------------- tsdb/index/postings_test.go | 88 ++++++++++++------------------------- 2 files changed, 46 insertions(+), 125 deletions(-) diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index 7bc5629ac..58f3473da 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -26,7 +26,6 @@ import ( "sync" "github.com/bboreham/go-loser" - "github.com/cespare/xxhash/v2" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" @@ -293,76 +292,30 @@ func (p *MemPostings) EnsureOrder(numberOfConcurrentProcesses int) { func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}, affected map[labels.Label]struct{}) { p.mtx.Lock() defer p.mtx.Unlock() - if len(p.m) == 0 || len(deleted) == 0 { - return + + process := func(l labels.Label) { + orig := p.m[l.Name][l.Value] + repl := make([]storage.SeriesRef, 0, len(orig)) + for _, id := range orig { + if _, ok := deleted[id]; !ok { + repl = append(repl, id) + } + } + if len(repl) > 0 { + p.m[l.Name][l.Value] = repl + } else { + delete(p.m[l.Name], l.Value) + // Delete the key if we removed all values. + if len(p.m[l.Name]) == 0 { + delete(p.m, l.Name) + } + } } - // Deleting label names mutates p.m map, so it should be done from a single goroutine after nobody else is reading it. - deleteLabelNames := make(chan string, len(p.m)) - - process, wait := processWithBoundedParallelismAndConsistentWorkers( - runtime.GOMAXPROCS(0), - func(l labels.Label) uint64 { return xxhash.Sum64String(l.Name) }, - func(l labels.Label) { - orig := p.m[l.Name][l.Value] - repl := make([]storage.SeriesRef, 0, len(orig)) - for _, id := range orig { - if _, ok := deleted[id]; !ok { - repl = append(repl, id) - } - } - if len(repl) > 0 { - p.m[l.Name][l.Value] = repl - } else { - delete(p.m[l.Name], l.Value) - if len(p.m[l.Name]) == 0 { - // Delete the key if we removed all values. - deleteLabelNames <- l.Name - } - } - }, - ) - for l := range affected { process(l) } process(allPostingsKey) - wait() - - // Close deleteLabelNames channel and delete the label names requested. - close(deleteLabelNames) - for name := range deleteLabelNames { - delete(p.m, name) - } -} - -// processWithBoundedParallelismAndConsistentWorkers will call f() with bounded parallelism, -// making sure that elements with same hash(T) will always be processed by the same worker. -// Call process() to add more jobs to process, and once finished adding, call wait() to ensure that all jobs are processed. -func processWithBoundedParallelismAndConsistentWorkers[T any](workers int, hash func(T) uint64, f func(T)) (process func(T), wait func()) { - wg := &sync.WaitGroup{} - jobs := make([]chan T, workers) - for i := 0; i < workers; i++ { - wg.Add(1) - jobs[i] = make(chan T, 128) - go func(jobs <-chan T) { - defer wg.Done() - for l := range jobs { - f(l) - } - }(jobs[i]) - } - - process = func(job T) { - jobs[hash(job)%uint64(workers)] <- job - } - wait = func() { - for i := range jobs { - close(jobs[i]) - } - wg.Wait() - } - return process, wait } // Iter calls f for each postings list. It aborts if f returns an error and returns it. diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 8ee9b9943..7d0b717bf 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -973,69 +973,37 @@ func TestMemPostingsStats(t *testing.T) { } func TestMemPostings_Delete(t *testing.T) { - t.Run("some postings", func(t *testing.T) { - p := NewMemPostings() - p.Add(1, labels.FromStrings("lbl1", "a")) - p.Add(2, labels.FromStrings("lbl1", "b")) - p.Add(3, labels.FromStrings("lbl2", "a")) + p := NewMemPostings() + p.Add(1, labels.FromStrings("lbl1", "a")) + p.Add(2, labels.FromStrings("lbl1", "b")) + p.Add(3, labels.FromStrings("lbl2", "a")) - before := p.Get(allPostingsKey.Name, allPostingsKey.Value) - deletedRefs := map[storage.SeriesRef]struct{}{ - 2: {}, - } - affectedLabels := map[labels.Label]struct{}{ - {Name: "lbl1", Value: "b"}: {}, - } - p.Delete(deletedRefs, affectedLabels) - after := p.Get(allPostingsKey.Name, allPostingsKey.Value) + before := p.Get(allPostingsKey.Name, allPostingsKey.Value) + deletedRefs := map[storage.SeriesRef]struct{}{ + 2: {}, + } + affectedLabels := map[labels.Label]struct{}{ + {Name: "lbl1", Value: "b"}: {}, + } + p.Delete(deletedRefs, affectedLabels) + after := p.Get(allPostingsKey.Name, allPostingsKey.Value) - // Make sure postings gotten before the delete have the old data when - // iterated over. - expanded, err := ExpandPostings(before) - require.NoError(t, err) - require.Equal(t, []storage.SeriesRef{1, 2, 3}, expanded) + // Make sure postings gotten before the delete have the old data when + // iterated over. + expanded, err := ExpandPostings(before) + require.NoError(t, err) + require.Equal(t, []storage.SeriesRef{1, 2, 3}, expanded) - // Make sure postings gotten after the delete have the new data when - // iterated over. - expanded, err = ExpandPostings(after) - require.NoError(t, err) - require.Equal(t, []storage.SeriesRef{1, 3}, expanded) + // Make sure postings gotten after the delete have the new data when + // iterated over. + expanded, err = ExpandPostings(after) + require.NoError(t, err) + require.Equal(t, []storage.SeriesRef{1, 3}, expanded) - deleted := p.Get("lbl1", "b") - expanded, err = ExpandPostings(deleted) - require.NoError(t, err) - require.Empty(t, expanded, "expected empty postings, got %v", expanded) - }) - - t.Run("all postings", func(t *testing.T) { - p := NewMemPostings() - p.Add(1, labels.FromStrings("lbl1", "a")) - p.Add(2, labels.FromStrings("lbl1", "b")) - p.Add(3, labels.FromStrings("lbl2", "a")) - - deletedRefs := map[storage.SeriesRef]struct{}{1: {}, 2: {}, 3: {}} - affectedLabels := map[labels.Label]struct{}{ - {Name: "lbl1", Value: "a"}: {}, - {Name: "lbl1", Value: "b"}: {}, - {Name: "lbl1", Value: "c"}: {}, - } - p.Delete(deletedRefs, affectedLabels) - after := p.Get(allPostingsKey.Name, allPostingsKey.Value) - expanded, err := ExpandPostings(after) - require.NoError(t, err) - require.Empty(t, expanded) - }) - - t.Run("nothing on empty mempostings", func(t *testing.T) { - p := NewMemPostings() - deletedRefs := map[storage.SeriesRef]struct{}{} - affectedLabels := map[labels.Label]struct{}{} - p.Delete(deletedRefs, affectedLabels) - after := p.Get(allPostingsKey.Name, allPostingsKey.Value) - expanded, err := ExpandPostings(after) - require.NoError(t, err) - require.Empty(t, expanded) - }) + deleted := p.Get("lbl1", "b") + expanded, err = ExpandPostings(deleted) + require.NoError(t, err) + require.Empty(t, expanded, "expected empty postings, got %v", expanded) } // BenchmarkMemPostings_Delete is quite heavy, so consider running it with @@ -1057,7 +1025,7 @@ func BenchmarkMemPostings_Delete(b *testing.B) { return s } - const total = 2e6 + const total = 1e6 allSeries := [total]labels.Labels{} nameValues := make([]string, 0, 100) for i := 0; i < total; i++ { From c861b31b72b6716d166b74b2701a22a528342c2a Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Tue, 29 Oct 2024 19:40:12 +0100 Subject: [PATCH 325/339] Support UTF-8 metric names and labels in web UI Fixes most of https://github.com/prometheus/prometheus/issues/15202 This should address all areas of the UI except for the autocompletion in the codemirror-promql text editor. The strategy here is that any time we print or internally serialize (like for the PromLens tree view) either a metric name or a label name as part of a selector or in other relevant parts of PromQL, we check whether it contains characters beyond what was previously supported, and if so, quote and escape it. In the case of metric names, we also have to move them from the beginning of the selector into the curly braces. Signed-off-by: Julius Volz --- .../mantine-ui/src/components/LabelBadges.tsx | 3 +- web/ui/mantine-ui/src/lib/formatSeries.ts | 14 ++- .../mantine-ui/src/pages/query/SeriesName.tsx | 39 +++++-- web/ui/mantine-ui/src/promql/format.tsx | 63 +++++++--- web/ui/mantine-ui/src/promql/serialize.ts | 44 ++++--- .../src/promql/serializeAndFormat.test.ts | 109 +++++++++++++++++- web/ui/mantine-ui/src/promql/utils.ts | 15 +++ 7 files changed, 242 insertions(+), 45 deletions(-) diff --git a/web/ui/mantine-ui/src/components/LabelBadges.tsx b/web/ui/mantine-ui/src/components/LabelBadges.tsx index f60a37f03..8aa713556 100644 --- a/web/ui/mantine-ui/src/components/LabelBadges.tsx +++ b/web/ui/mantine-ui/src/components/LabelBadges.tsx @@ -2,6 +2,7 @@ import { Badge, BadgeVariant, Group, MantineColor, Stack } from "@mantine/core"; import { FC } from "react"; import { escapeString } from "../lib/escapeString"; import badgeClasses from "../Badge.module.css"; +import { maybeQuoteLabelName } from "../promql/utils"; export interface LabelBadgesProps { labels: Record; @@ -30,7 +31,7 @@ export const LabelBadges: FC = ({ }} key={k} > - {k}="{escapeString(v)}" + {maybeQuoteLabelName(k)}="{escapeString(v)}" ); })} diff --git a/web/ui/mantine-ui/src/lib/formatSeries.ts b/web/ui/mantine-ui/src/lib/formatSeries.ts index b79c40076..007659070 100644 --- a/web/ui/mantine-ui/src/lib/formatSeries.ts +++ b/web/ui/mantine-ui/src/lib/formatSeries.ts @@ -1,12 +1,24 @@ +import { + maybeQuoteLabelName, + metricContainsExtendedCharset, +} from "../promql/utils"; import { escapeString } from "./escapeString"; +// TODO: Maybe replace this with the new PromLens-derived serialization code in src/promql/serialize.ts? export const formatSeries = (labels: { [key: string]: string }): string => { if (labels === null) { return "scalar"; } + if (metricContainsExtendedCharset(labels.__name__ || "")) { + return `{"${escapeString(labels.__name__)}",${Object.entries(labels) + .filter(([k]) => k !== "__name__") + .map(([k, v]) => `${maybeQuoteLabelName(k)}="${escapeString(v)}"`) + .join(", ")}}`; + } + return `${labels.__name__ || ""}{${Object.entries(labels) .filter(([k]) => k !== "__name__") - .map(([k, v]) => `${k}="${escapeString(v)}"`) + .map(([k, v]) => `${maybeQuoteLabelName(k)}="${escapeString(v)}"`) .join(", ")}}`; }; diff --git a/web/ui/mantine-ui/src/pages/query/SeriesName.tsx b/web/ui/mantine-ui/src/pages/query/SeriesName.tsx index 66a7856f5..61bc62eee 100644 --- a/web/ui/mantine-ui/src/pages/query/SeriesName.tsx +++ b/web/ui/mantine-ui/src/pages/query/SeriesName.tsx @@ -5,6 +5,10 @@ import classes from "./SeriesName.module.css"; import { escapeString } from "../../lib/escapeString"; import { useClipboard } from "@mantine/hooks"; import { notifications } from "@mantine/notifications"; +import { + maybeQuoteLabelName, + metricContainsExtendedCharset, +} from "../../promql/utils"; interface SeriesNameProps { labels: { [key: string]: string } | null; @@ -15,8 +19,26 @@ const SeriesName: FC = ({ labels, format }) => { const clipboard = useClipboard(); const renderFormatted = (): React.ReactElement => { + const metricExtendedCharset = + labels && metricContainsExtendedCharset(labels.__name__ || ""); + const labelNodes: React.ReactElement[] = []; let first = true; + + // If the metric name uses the extended new charset, we need to escape it, + // put it into the label matcher list, and make sure it's the first item. + if (metricExtendedCharset) { + labelNodes.push( + + + "{escapeString(labels.__name__)}" + + + ); + + first = false; + } + for (const label in labels) { if (label === "__name__") { continue; @@ -37,7 +59,10 @@ const SeriesName: FC = ({ labels, format }) => { }} title="Click to copy label matcher" > - {label}= + + {maybeQuoteLabelName(label)} + + = "{escapeString(labels[label])}" @@ -52,9 +77,11 @@ const SeriesName: FC = ({ labels, format }) => { return ( - - {labels ? labels.__name__ : ""} - + {!metricExtendedCharset && ( + + {labels ? labels.__name__ : ""} + + )} {"{"} {labelNodes} {"}"} @@ -62,10 +89,6 @@ const SeriesName: FC = ({ labels, format }) => { ); }; - if (labels === null) { - return <>scalar; - } - if (format) { return renderFormatted(); } diff --git a/web/ui/mantine-ui/src/promql/format.tsx b/web/ui/mantine-ui/src/promql/format.tsx index 05dd7d410..399644408 100644 --- a/web/ui/mantine-ui/src/promql/format.tsx +++ b/web/ui/mantine-ui/src/promql/format.tsx @@ -8,14 +8,21 @@ import ASTNode, { MatrixSelector, } from "./ast"; import { formatPrometheusDuration } from "../lib/formatTime"; -import { maybeParenthesizeBinopChild, escapeString } from "./utils"; +import { + maybeParenthesizeBinopChild, + escapeString, + maybeQuoteLabelName, + metricContainsExtendedCharset, +} from "./utils"; export const labelNameList = (labels: string[]): React.ReactNode[] => { return labels.map((l, i) => { return ( {i !== 0 && ", "} - {l} + + {maybeQuoteLabelName(l)} + ); }); @@ -69,27 +76,45 @@ const formatAtAndOffset = ( const formatSelector = ( node: VectorSelector | MatrixSelector ): ReactElement => { - const matchLabels = node.matchers - .filter( - (m) => - !( - m.name === "__name__" && - m.type === matchType.equal && - m.value === node.name - ) - ) - .map((m, i) => ( - - {i !== 0 && ","} - {m.name} - {m.type} - "{escapeString(m.value)}" + const matchLabels: JSX.Element[] = []; + + // If the metric name contains the new extended charset, we need to escape it + // and add it at the beginning of the matchers list in the curly braces. + const metricName = + node.name || + node.matchers.find( + (m) => m.name === "__name__" && m.type === matchType.equal + )?.value || + ""; + const metricExtendedCharset = metricContainsExtendedCharset(metricName); + if (metricExtendedCharset) { + matchLabels.push( + + "{escapeString(metricName)}" - )); + ); + } + + matchLabels.push( + ...node.matchers + .filter((m) => !(m.name === "__name__" && m.type === matchType.equal)) + .map((m, i) => ( + + {(i !== 0 || metricExtendedCharset) && ","} + + {maybeQuoteLabelName(m.name)} + + {m.type} + "{escapeString(m.value)}" + + )) + ); return ( <> - {node.name} + {!metricExtendedCharset && ( + {metricName} + )} {matchLabels.length > 0 && ( <> {"{"} diff --git a/web/ui/mantine-ui/src/promql/serialize.ts b/web/ui/mantine-ui/src/promql/serialize.ts index af9c6ef15..1d2c63f4f 100644 --- a/web/ui/mantine-ui/src/promql/serialize.ts +++ b/web/ui/mantine-ui/src/promql/serialize.ts @@ -11,8 +11,14 @@ import { aggregatorsWithParam, maybeParenthesizeBinopChild, escapeString, + metricContainsExtendedCharset, + maybeQuoteLabelName, } from "./utils"; +const labelNameList = (labels: string[]): string => { + return labels.map((ln) => maybeQuoteLabelName(ln)).join(", "); +}; + const serializeAtAndOffset = ( timestamp: number | null, startOrEnd: StartOrEnd, @@ -28,15 +34,23 @@ const serializeAtAndOffset = ( const serializeSelector = (node: VectorSelector | MatrixSelector): string => { const matchers = node.matchers - .filter( - (m) => - !( - m.name === "__name__" && - m.type === matchType.equal && - m.value === node.name - ) - ) - .map((m) => `${m.name}${m.type}"${escapeString(m.value)}"`); + .filter((m) => !(m.name === "__name__" && m.type === matchType.equal)) + .map( + (m) => `${maybeQuoteLabelName(m.name)}${m.type}"${escapeString(m.value)}"` + ); + + // If the metric name contains the new extended charset, we need to escape it + // and add it at the beginning of the matchers list in the curly braces. + const metricName = + node.name || + node.matchers.find( + (m) => m.name === "__name__" && m.type === matchType.equal + )?.value || + ""; + const metricExtendedCharset = metricContainsExtendedCharset(metricName); + if (metricExtendedCharset) { + matchers.unshift(`"${escapeString(metricName)}"`); + } const range = node.type === nodeType.matrixSelector @@ -48,7 +62,7 @@ const serializeSelector = (node: VectorSelector | MatrixSelector): string => { node.offset ); - return `${node.name}${matchers.length > 0 ? `{${matchers.join(",")}}` : ""}${range}${atAndOffset}`; + return `${!metricExtendedCharset ? metricName : ""}${matchers.length > 0 ? `{${matchers.join(",")}}` : ""}${range}${atAndOffset}`; }; const serializeNode = ( @@ -68,9 +82,9 @@ const serializeNode = ( case nodeType.aggregation: return `${initialInd}${node.op}${ node.without - ? ` without(${node.grouping.join(", ")}) ` + ? ` without(${labelNameList(node.grouping)}) ` : node.grouping.length > 0 - ? ` by(${node.grouping.join(", ")}) ` + ? ` by(${labelNameList(node.grouping)}) ` : "" }(${childListSeparator}${ aggregatorsWithParam.includes(node.op) && node.param !== null @@ -119,16 +133,16 @@ const serializeNode = ( const vm = node.matching; if (vm !== null && (vm.labels.length > 0 || vm.on)) { if (vm.on) { - matching = ` on(${vm.labels.join(", ")})`; + matching = ` on(${labelNameList(vm.labels)})`; } else { - matching = ` ignoring(${vm.labels.join(", ")})`; + matching = ` ignoring(${labelNameList(vm.labels)})`; } if ( vm.card === vectorMatchCardinality.manyToOne || vm.card === vectorMatchCardinality.oneToMany ) { - grouping = ` group_${vm.card === vectorMatchCardinality.manyToOne ? "left" : "right"}(${vm.include.join(",")})`; + grouping = ` group_${vm.card === vectorMatchCardinality.manyToOne ? "left" : "right"}(${labelNameList(vm.include)})`; } } diff --git a/web/ui/mantine-ui/src/promql/serializeAndFormat.test.ts b/web/ui/mantine-ui/src/promql/serializeAndFormat.test.ts index ea045612c..a2b97ec90 100644 --- a/web/ui/mantine-ui/src/promql/serializeAndFormat.test.ts +++ b/web/ui/mantine-ui/src/promql/serializeAndFormat.test.ts @@ -99,7 +99,7 @@ describe("serializeNode and formatNode", () => { timestamp: null, startOrEnd: null, }, - output: '{__name__="metric_name"} offset 1m', + output: "metric_name offset 1m", }, { // Escaping in label values. @@ -642,6 +642,113 @@ describe("serializeNode and formatNode", () => { == bool on(label1, label2) group_right(label3) …`, }, + // Test new Prometheus 3.0 UTF-8 support. + { + node: { + bool: false, + lhs: { + bool: false, + lhs: { + expr: { + matchers: [ + { + name: "__name__", + type: matchType.equal, + value: "metric_ä", + }, + { + name: "foo", + type: matchType.equal, + value: "bar", + }, + ], + name: "", + offset: 0, + startOrEnd: null, + timestamp: null, + type: nodeType.vectorSelector, + }, + grouping: ["a", "ä"], + op: aggregationType.sum, + param: null, + type: nodeType.aggregation, + without: false, + }, + matching: { + card: vectorMatchCardinality.manyToOne, + include: ["c", "ü"], + labels: ["b", "ö"], + on: true, + }, + op: binaryOperatorType.div, + rhs: { + expr: { + matchers: [ + { + name: "__name__", + type: matchType.equal, + value: "metric_ö", + }, + { + name: "bar", + type: matchType.equal, + value: "foo", + }, + ], + name: "", + offset: 0, + startOrEnd: null, + timestamp: null, + type: nodeType.vectorSelector, + }, + grouping: ["d", "ä"], + op: aggregationType.sum, + param: null, + type: nodeType.aggregation, + without: true, + }, + type: nodeType.binaryExpr, + }, + matching: { + card: vectorMatchCardinality.oneToOne, + include: [], + labels: ["e", "ö"], + on: false, + }, + op: binaryOperatorType.add, + rhs: { + expr: { + matchers: [ + { + name: "__name__", + type: matchType.equal, + value: "metric_ü", + }, + ], + name: "", + offset: 0, + startOrEnd: null, + timestamp: null, + type: nodeType.vectorSelector, + }, + type: nodeType.parenExpr, + }, + type: nodeType.binaryExpr, + }, + output: + 'sum by(a, "ä") ({"metric_ä",foo="bar"}) / on(b, "ö") group_left(c, "ü") sum without(d, "ä") ({"metric_ö",bar="foo"}) + ignoring(e, "ö") ({"metric_ü"})', + prettyOutput: ` sum by(a, "ä") ( + {"metric_ä",foo="bar"} + ) + / on(b, "ö") group_left(c, "ü") + sum without(d, "ä") ( + {"metric_ö",bar="foo"} + ) ++ ignoring(e, "ö") + ( + {"metric_ü"} + )`, + }, ]; tests.forEach((t) => { diff --git a/web/ui/mantine-ui/src/promql/utils.ts b/web/ui/mantine-ui/src/promql/utils.ts index 5477ee596..2f1cc11d2 100644 --- a/web/ui/mantine-ui/src/promql/utils.ts +++ b/web/ui/mantine-ui/src/promql/utils.ts @@ -267,6 +267,21 @@ export const humanizedValueType: Record = { [valueType.matrix]: "range vector", }; +const metricNameRe = /^[a-zA-Z_:][a-zA-Z0-9_:]*$/; +const labelNameCharsetRe = /^[a-zA-Z_][a-zA-Z0-9_]*$/; + +export const metricContainsExtendedCharset = (str: string) => { + return !metricNameRe.test(str); +}; + +export const labelNameContainsExtendedCharset = (str: string) => { + return !labelNameCharsetRe.test(str); +}; + export const escapeString = (str: string) => { return str.replace(/([\\"])/g, "\\$1"); }; + +export const maybeQuoteLabelName = (str: string) => { + return labelNameContainsExtendedCharset(str) ? `"${escapeString(str)}"` : str; +}; From 2e7c739d4475102e894b1ac538dc4e205573158c Mon Sep 17 00:00:00 2001 From: Juraj Michalek Date: Fri, 25 Oct 2024 16:46:00 +0200 Subject: [PATCH 326/339] chore: add tcp events to remote store span Signed-off-by: Juraj Michalek --- storage/remote/client.go | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/storage/remote/client.go b/storage/remote/client.go index 62218cfba..54c8b34fc 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -18,8 +18,10 @@ import ( "context" "errors" "fmt" + "go.opentelemetry.io/otel/attribute" "io" "net/http" + "net/http/httptrace" "strconv" "strings" "time" @@ -279,7 +281,36 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) (WriteRespo ctx, span := otel.Tracer("").Start(ctx, "Remote Store", trace.WithSpanKind(trace.SpanKindClient)) defer span.End() - httpResp, err := c.Client.Do(httpReq.WithContext(ctx)) + httpReqTrace := &httptrace.ClientTrace{ + GetConn: func(hostPort string) { + span.AddEvent("GetConn", trace.WithAttributes(attribute.String("host", hostPort))) + }, + GotConn: func(info httptrace.GotConnInfo) { + span.AddEvent("GotConn", trace.WithAttributes( + attribute.Bool("reused", info.Reused), + attribute.Bool("wasIdle", info.WasIdle), + //attribute.Duration("idleTime", info.IdleTime), + )) + }, + DNSStart: func(info httptrace.DNSStartInfo) { + span.AddEvent("DNSStart", trace.WithAttributes(attribute.String("host", info.Host))) + }, + DNSDone: func(info httptrace.DNSDoneInfo) { + span.AddEvent("DNSDone", trace.WithAttributes(attribute.Bool("coalesced", info.Coalesced))) + }, + ConnectStart: func(network, addr string) { + span.AddEvent("ConnectStart", trace.WithAttributes(attribute.String("network", network), attribute.String("addr", addr))) + }, + ConnectDone: func(network, addr string, err error) { + attrs := []attribute.KeyValue{attribute.String("network", network), attribute.String("addr", addr)} + if err != nil { + attrs = append(attrs, attribute.String("error", err.Error())) + } + span.AddEvent("ConnectDone", trace.WithAttributes(attrs...)) + }, + } + + httpResp, err := c.Client.Do(httpReq.WithContext(httptrace.WithClientTrace(ctx, httpReqTrace))) if err != nil { // Errors from Client.Do are from (for example) network errors, so are // recoverable. From 3c1ffbb2fdb646ff7382d6debfe6771b5590066c Mon Sep 17 00:00:00 2001 From: Juraj Michalek Date: Fri, 25 Oct 2024 17:01:52 +0200 Subject: [PATCH 327/339] chore: added idleTimeMs and fixed linting issues Signed-off-by: Juraj Michalek --- storage/remote/client.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/storage/remote/client.go b/storage/remote/client.go index 54c8b34fc..ad898cd3f 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -18,7 +18,6 @@ import ( "context" "errors" "fmt" - "go.opentelemetry.io/otel/attribute" "io" "net/http" "net/http/httptrace" @@ -26,6 +25,8 @@ import ( "strings" "time" + "go.opentelemetry.io/otel/attribute" + "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" @@ -289,7 +290,7 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) (WriteRespo span.AddEvent("GotConn", trace.WithAttributes( attribute.Bool("reused", info.Reused), attribute.Bool("wasIdle", info.WasIdle), - //attribute.Duration("idleTime", info.IdleTime), + attribute.Float64("idleTimeMs", float64(info.IdleTime.Milliseconds())), )) }, DNSStart: func(info httptrace.DNSStartInfo) { From 7ecdb55b02345dc7c52f078840c75f9ec65b7c14 Mon Sep 17 00:00:00 2001 From: Juraj Michalek Date: Mon, 28 Oct 2024 16:02:09 +0100 Subject: [PATCH 328/339] chore: use otelhttptrace instead Signed-off-by: Juraj Michalek --- go.mod | 9 +++++---- go.sum | 18 ++++++++++-------- storage/remote/client.go | 41 +++++++--------------------------------- 3 files changed, 22 insertions(+), 46 deletions(-) diff --git a/go.mod b/go.mod index 6d33d2ed2..3399ffb00 100644 --- a/go.mod +++ b/go.mod @@ -62,13 +62,14 @@ require ( github.com/vultr/govultr/v2 v2.17.2 go.opentelemetry.io/collector/pdata v1.16.0 go.opentelemetry.io/collector/semconv v0.110.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 - go.opentelemetry.io/otel v1.30.0 + go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 + go.opentelemetry.io/otel v1.31.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 go.opentelemetry.io/otel/sdk v1.30.0 - go.opentelemetry.io/otel/trace v1.30.0 + go.opentelemetry.io/otel/trace v1.31.0 go.uber.org/atomic v1.11.0 go.uber.org/automaxprocs v1.6.0 go.uber.org/goleak v1.3.0 @@ -184,7 +185,7 @@ require ( github.com/xhit/go-str2duration/v2 v2.1.0 // indirect go.mongodb.org/mongo-driver v1.14.0 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/otel/metric v1.30.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/crypto v0.28.0 // indirect golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect diff --git a/go.sum b/go.sum index 3d415cf34..1dce748ba 100644 --- a/go.sum +++ b/go.sum @@ -591,22 +591,24 @@ go.opentelemetry.io/collector/pdata v1.16.0 h1:g02K8jlRnmQ7TQDuXpdgVL6vIxIVqr5Gb go.opentelemetry.io/collector/pdata v1.16.0/go.mod h1:YZZJIt2ehxosYf/Y1pbvexjNWsIGNNrzzlCTO9jC1F4= go.opentelemetry.io/collector/semconv v0.110.0 h1:KHQnOHe3gUz0zsxe8ph9kN5OTypCFD4V+06AiBTfeNk= go.opentelemetry.io/collector/semconv v0.110.0/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0 h1:ZIg3ZT/aQ7AfKqdwp7ECpOK6vHqquXXuyTjIO8ZdmPs= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.55.0/go.mod h1:DQAwmETtZV00skUwgD6+0U89g80NKsJE3DCKeLLPQMI= -go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= -go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 h1:4BZHA+B1wXEQoGNHxW8mURaLhcdGwvRnmhGbm+odRbc= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0/go.mod h1:3qi2EEwMgB4xnKgPLqsDP3j9qxnHDZeHsnAxfjQqTko= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 h1:lsInsfvhVIfOI6qHVyysXMNDnjO9Npvl7tlDPJFBVd4= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0/go.mod h1:KQsVNh4OjgjTG0G6EiNi1jVpnaeeKsKMRwbLN+f1+8M= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0 h1:m0yTiGDLUvVYaTFbAvCkVYIYcvwKt3G7OLoN77NUs/8= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0/go.mod h1:wBQbT4UekBfegL2nx0Xk1vBcnzyBPsIVm9hRG4fYcr4= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 h1:umZgi92IyxfXd/l4kaDhnKgY8rnN/cZcF1LKc6I8OQ8= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0/go.mod h1:4lVs6obhSVRb1EW5FhOuBTyiQhtRtAnnva9vD3yRfq8= -go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= -go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE= go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg= -go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= -go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= diff --git a/storage/remote/client.go b/storage/remote/client.go index ad898cd3f..d37b1505c 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -25,8 +25,6 @@ import ( "strings" "time" - "go.opentelemetry.io/otel/attribute" - "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" @@ -34,6 +32,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/common/sigv4" "github.com/prometheus/common/version" + "go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace" @@ -216,8 +215,11 @@ func NewWriteClient(name string, conf *ClientConfig) (WriteClient, error) { if conf.WriteProtoMsg != "" { writeProtoMsg = conf.WriteProtoMsg } - - httpClient.Transport = otelhttp.NewTransport(t) + httpClient.Transport = otelhttp.NewTransport( + t, + otelhttp.WithClientTrace(func(ctx context.Context) *httptrace.ClientTrace { + return otelhttptrace.NewClientTrace(ctx) + })) return &Client{ remoteName: name, urlString: conf.URL.String(), @@ -282,36 +284,7 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) (WriteRespo ctx, span := otel.Tracer("").Start(ctx, "Remote Store", trace.WithSpanKind(trace.SpanKindClient)) defer span.End() - httpReqTrace := &httptrace.ClientTrace{ - GetConn: func(hostPort string) { - span.AddEvent("GetConn", trace.WithAttributes(attribute.String("host", hostPort))) - }, - GotConn: func(info httptrace.GotConnInfo) { - span.AddEvent("GotConn", trace.WithAttributes( - attribute.Bool("reused", info.Reused), - attribute.Bool("wasIdle", info.WasIdle), - attribute.Float64("idleTimeMs", float64(info.IdleTime.Milliseconds())), - )) - }, - DNSStart: func(info httptrace.DNSStartInfo) { - span.AddEvent("DNSStart", trace.WithAttributes(attribute.String("host", info.Host))) - }, - DNSDone: func(info httptrace.DNSDoneInfo) { - span.AddEvent("DNSDone", trace.WithAttributes(attribute.Bool("coalesced", info.Coalesced))) - }, - ConnectStart: func(network, addr string) { - span.AddEvent("ConnectStart", trace.WithAttributes(attribute.String("network", network), attribute.String("addr", addr))) - }, - ConnectDone: func(network, addr string, err error) { - attrs := []attribute.KeyValue{attribute.String("network", network), attribute.String("addr", addr)} - if err != nil { - attrs = append(attrs, attribute.String("error", err.Error())) - } - span.AddEvent("ConnectDone", trace.WithAttributes(attrs...)) - }, - } - - httpResp, err := c.Client.Do(httpReq.WithContext(httptrace.WithClientTrace(ctx, httpReqTrace))) + httpResp, err := c.Client.Do(httpReq.WithContext(ctx)) if err != nil { // Errors from Client.Do are from (for example) network errors, so are // recoverable. From 76ff12b32a9ec946c4c2241ea5fea148a2f952c2 Mon Sep 17 00:00:00 2001 From: Juraj Michalek Date: Wed, 30 Oct 2024 09:41:16 +0100 Subject: [PATCH 329/339] chore: only create span events Signed-off-by: Juraj Michalek --- storage/remote/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/remote/client.go b/storage/remote/client.go index d37b1505c..23775122e 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -218,7 +218,7 @@ func NewWriteClient(name string, conf *ClientConfig) (WriteClient, error) { httpClient.Transport = otelhttp.NewTransport( t, otelhttp.WithClientTrace(func(ctx context.Context) *httptrace.ClientTrace { - return otelhttptrace.NewClientTrace(ctx) + return otelhttptrace.NewClientTrace(ctx, otelhttptrace.WithoutSubSpans()) })) return &Client{ remoteName: name, From 76ca7d08d9ae9cb23ea0a498f87370a26f16dc1e Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 30 Oct 2024 16:43:10 +0100 Subject: [PATCH 330/339] Fixup: re-add erroneously removed lines Signed-off-by: Julius Volz --- web/ui/mantine-ui/src/pages/query/SeriesName.tsx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/web/ui/mantine-ui/src/pages/query/SeriesName.tsx b/web/ui/mantine-ui/src/pages/query/SeriesName.tsx index 61bc62eee..d03b530f0 100644 --- a/web/ui/mantine-ui/src/pages/query/SeriesName.tsx +++ b/web/ui/mantine-ui/src/pages/query/SeriesName.tsx @@ -89,6 +89,10 @@ const SeriesName: FC = ({ labels, format }) => { ); }; + if (labels === null) { + return <>scalar; + } + if (format) { return renderFormatted(); } From 8588289c246304dd2a736154a6a0904707b02cd2 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Wed, 30 Oct 2024 18:07:51 -0300 Subject: [PATCH 331/339] otlp translator: Add test showing bugs Signed-off-by: Arthur Silva Sens --- .../prometheus/normalize_label_test.go | 45 +++++++++++++++++++ .../prometheus/normalize_name_test.go | 1 + 2 files changed, 46 insertions(+) create mode 100644 storage/remote/otlptranslator/prometheus/normalize_label_test.go diff --git a/storage/remote/otlptranslator/prometheus/normalize_label_test.go b/storage/remote/otlptranslator/prometheus/normalize_label_test.go new file mode 100644 index 000000000..3ceb8760c --- /dev/null +++ b/storage/remote/otlptranslator/prometheus/normalize_label_test.go @@ -0,0 +1,45 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prometheus + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNormalizeLabel(t *testing.T) { + tests := []struct { + label string + expected string + }{ + {"", ""}, + {"label:with:colons", "label_with_colons"}, // Without UTF-8 support, colons are only allowed in metric names + {"LabelWithCapitalLetters", "LabelWithCapitalLetters"}, + {"label!with&special$chars)", "label_with_special_chars_"}, + {"label_with_foreign_characteres_字符", "label_with_foreign_characteres___"}, + {"label.with.dots", "label_with_dots"}, + {"123label", "key_123label"}, + {"_label_starting_with_underscore", "key_label_starting_with_underscore"}, + {"__label_starting_with_2underscores", "__label_starting_with_2underscores"}, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { + result := NormalizeLabel(test.label) + require.Equal(t, test.expected, result) + }) + } +} diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 07b9b0a78..4e5520941 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -202,4 +202,5 @@ func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false)) require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false)) require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false)) + require.Equal(t, "metric_with___foreign_characteres", BuildCompliantName(createCounter("metric_with_字符_foreign_characteres", ""), "", false)) } From ea06f1a1d1ea4571d040e952b626da2e7fa74668 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Wed, 30 Oct 2024 19:29:31 -0300 Subject: [PATCH 332/339] bugfix: Fix otlp translation of foreign characters Signed-off-by: Arthur Silva Sens --- storage/remote/otlptranslator/prometheus/normalize_label.go | 2 +- storage/remote/otlptranslator/prometheus/normalize_name.go | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index a112b9bbc..c22c76132 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -49,7 +49,7 @@ func NormalizeLabel(label string) string { // Return '_' for anything non-alphanumeric. func sanitizeRune(r rune) rune { - if unicode.IsLetter(r) || unicode.IsDigit(r) { + if unicode.IsLower(r) || unicode.IsUpper(r) || unicode.IsDigit(r) { return r } return '_' diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 0f472b80a..36b647f51 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -237,11 +237,13 @@ func removeSuffix(tokens []string, suffix string) []string { // Clean up specified string so it's Prometheus compliant func CleanUpString(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }), "_") + return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsUpper(r) && !unicode.IsLower(r) && !unicode.IsDigit(r) }), "_") } func RemovePromForbiddenRunes(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' && r != ':' }), "_") + return strings.Join(strings.FieldsFunc(s, func(r rune) bool { + return !unicode.IsUpper(r) && !unicode.IsLower(r) && !unicode.IsDigit(r) && r != '_' && r != ':' + }), "_") } // Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit From 5fdd382ec364d4bfa4e000960253c700f1d32390 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Thu, 31 Oct 2024 16:22:59 +0100 Subject: [PATCH 333/339] CHANGELOG: remove duplicate entry Signed-off-by: Jan Fajerski --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f71b701ab..de97354f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,6 @@ * [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164 * [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178 * [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657 -* [CHANGE] Adopt log/slog and remove go-kit/log. #14906 * [CHANGE] Disallow configuring AM with the v1 api. #13883 * [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 * [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196 From adcc873d5119c1d1046869c8d9746768689c491d Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 2 Nov 2024 11:24:44 +0000 Subject: [PATCH 334/339] [BUILD] React-app: replace 0.55.0-rc.0 with 0.55.0 Try to stop build errors. Signed-off-by: Bryan Boreham --- web/ui/react-app/package-lock.json | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/web/ui/react-app/package-lock.json b/web/ui/react-app/package-lock.json index f8d1cfb3e..d3de03e5e 100644 --- a/web/ui/react-app/package-lock.json +++ b/web/ui/react-app/package-lock.json @@ -4341,12 +4341,11 @@ } }, "node_modules/@prometheus-io/codemirror-promql": { - "version": "0.55.0-rc.0", - "resolved": "https://registry.npmjs.org/@prometheus-io/codemirror-promql/-/codemirror-promql-0.55.0-rc.0.tgz", - "integrity": "sha512-BlDKH2eB8Sd9bQmQjvJvncvZ+VTtrtReSO6qWZXULyrXp+FEjONybOH3Ejq/0a2hat0GpZzcEfwKqPbdy4WdCQ==", - "license": "Apache-2.0", + "version": "0.55.0", + "resolved": "https://registry.npmjs.org/@prometheus-io/codemirror-promql/-/codemirror-promql-0.55.0.tgz", + "integrity": "sha512-W+aBBToIvxHbcDsQYJSpgaMtcLUCy3SMIK6jluaEgJrkpOfEJnItZu/rvCC/ehCz2c+h+6WkPJklH8WogsXyEg==", "dependencies": { - "@prometheus-io/lezer-promql": "0.55.0-rc.0", + "@prometheus-io/lezer-promql": "0.55.0", "lru-cache": "^7.18.3" }, "engines": { @@ -4362,10 +4361,9 @@ } }, "node_modules/@prometheus-io/lezer-promql": { - "version": "0.55.0-rc.0", - "resolved": "https://registry.npmjs.org/@prometheus-io/lezer-promql/-/lezer-promql-0.55.0-rc.0.tgz", - "integrity": "sha512-Ikaabw8gfu0HI2D2rKykLBWio+ytTEE03bdZDMpILYULoeGVPdKgbeGLLI9Kafyv48Qiis55o60EfDoywiRHqA==", - "license": "Apache-2.0", + "version": "0.55.0", + "resolved": "https://registry.npmjs.org/@prometheus-io/lezer-promql/-/lezer-promql-0.55.0.tgz", + "integrity": "sha512-DHg6l6pfDnE8eLsj4DyXhFDe7OsqSBw2EnSVG4biddzLsIF5gXKazIswYTGHJ26CGHHiDPcbXjhlm9dEWI2aJA==", "peerDependencies": { "@lezer/highlight": "^1.1.2", "@lezer/lr": "^1.2.3" From 034d2b24bcae90fce3ac337b4ddd399bd2ff4bc4 Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Sat, 2 Nov 2024 11:38:51 -0300 Subject: [PATCH 335/339] Fix typos in tests (#15312) Signed-off-by: Arthur Silva Sens --- .../remote/otlptranslator/prometheus/normalize_label_test.go | 2 +- storage/remote/otlptranslator/prometheus/normalize_name_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/remote/otlptranslator/prometheus/normalize_label_test.go b/storage/remote/otlptranslator/prometheus/normalize_label_test.go index 3ceb8760c..21d4d6a6d 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label_test.go @@ -29,7 +29,7 @@ func TestNormalizeLabel(t *testing.T) { {"label:with:colons", "label_with_colons"}, // Without UTF-8 support, colons are only allowed in metric names {"LabelWithCapitalLetters", "LabelWithCapitalLetters"}, {"label!with&special$chars)", "label_with_special_chars_"}, - {"label_with_foreign_characteres_字符", "label_with_foreign_characteres___"}, + {"label_with_foreign_characters_字符", "label_with_foreign_characters___"}, {"label.with.dots", "label_with_dots"}, {"123label", "key_123label"}, {"_label_starting_with_underscore", "key_label_starting_with_underscore"}, diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 4e5520941..8c5dc7d1f 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -202,5 +202,5 @@ func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false)) require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false)) require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false)) - require.Equal(t, "metric_with___foreign_characteres", BuildCompliantName(createCounter("metric_with_字符_foreign_characteres", ""), "", false)) + require.Equal(t, "metric_with___foreign_characters", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", false)) } From 1a22b1d84671750d5e61d5d14631342cfa8aec0a Mon Sep 17 00:00:00 2001 From: Arthur Silva Sens Date: Sun, 3 Nov 2024 09:15:49 -0300 Subject: [PATCH 336/339] bugfix: Fix otlp translator switching colons to undescores (#15251) Signed-off-by: Arthur Silva Sens --- storage/remote/otlptranslator/prometheus/normalize_name.go | 2 +- .../remote/otlptranslator/prometheus/normalize_name_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 36b647f51..94fb4465f 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -114,7 +114,7 @@ func normalizeName(metric pmetric.Metric, namespace string) string { // Split metric name into "tokens" (remove all non-alphanumerics) nameTokens := strings.FieldsFunc( metric.Name(), - func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }, + func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != ':' }, ) // Split unit at the '/' if any diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 8c5dc7d1f..3ea4b70a5 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -184,8 +184,8 @@ func TestBuildCompliantNameWithNormalize(t *testing.T) { require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true)) require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true)) require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", true)) - require.Equal(t, "foo_bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true)) - require.Equal(t, "foo_bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true)) + require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true)) + require.Equal(t, ":foo::bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true)) // Gauges with unit 1 are considered ratios. require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true)) // Slashes in units are converted. From 2fbbfc3da800d3b33c7f7b430e403f66b781b962 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sun, 3 Nov 2024 11:56:44 +0000 Subject: [PATCH 337/339] Revert "Fix `MemPostings.Add` and `MemPostings.Get` data race (#15141)" This reverts commit 50ef0dc954592666a13ff92ef20811f0127c3b49. Memory allocation goes so high in Prombench that the system is unusable. Signed-off-by: Bryan Boreham --- tsdb/head_bench_test.go | 8 +++--- tsdb/index/postings.go | 27 ++++++------------ tsdb/index/postings_test.go | 55 ------------------------------------- 3 files changed, 12 insertions(+), 78 deletions(-) diff --git a/tsdb/head_bench_test.go b/tsdb/head_bench_test.go index aa2cf2214..dc682602b 100644 --- a/tsdb/head_bench_test.go +++ b/tsdb/head_bench_test.go @@ -43,7 +43,7 @@ func BenchmarkHeadStripeSeriesCreate(b *testing.B) { defer h.Close() for i := 0; i < b.N; i++ { - h.getOrCreate(uint64(i), labels.FromStrings(labels.MetricName, "test", "a", strconv.Itoa(i), "b", strconv.Itoa(i%10), "c", strconv.Itoa(i%100), "d", strconv.Itoa(i/2), "e", strconv.Itoa(i/4))) + h.getOrCreate(uint64(i), labels.FromStrings("a", strconv.Itoa(i))) } } @@ -61,8 +61,8 @@ func BenchmarkHeadStripeSeriesCreateParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - i := int(count.Inc()) - h.getOrCreate(uint64(i), labels.FromStrings(labels.MetricName, "test", "a", strconv.Itoa(i), "b", strconv.Itoa(i%10), "c", strconv.Itoa(i%100), "d", strconv.Itoa(i/2), "e", strconv.Itoa(i/4))) + i := count.Inc() + h.getOrCreate(uint64(i), labels.FromStrings("a", strconv.Itoa(int(i)))) } }) } @@ -82,7 +82,7 @@ func BenchmarkHeadStripeSeriesCreate_PreCreationFailure(b *testing.B) { defer h.Close() for i := 0; i < b.N; i++ { - h.getOrCreate(uint64(i), labels.FromStrings(labels.MetricName, "test", "a", strconv.Itoa(i), "b", strconv.Itoa(i%10), "c", strconv.Itoa(i%100), "d", strconv.Itoa(i/2), "e", strconv.Itoa(i/4))) + h.getOrCreate(uint64(i), labels.FromStrings("a", strconv.Itoa(i))) } } diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index 58f3473da..5ed41f769 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -345,14 +345,13 @@ func (p *MemPostings) Add(id storage.SeriesRef, lset labels.Labels) { p.mtx.Unlock() } -func appendWithExponentialGrowth[T any](a []T, v T) (_ []T, copied bool) { +func appendWithExponentialGrowth[T any](a []T, v T) []T { if cap(a) < len(a)+1 { newList := make([]T, len(a), len(a)*2+1) copy(newList, a) a = newList - copied = true } - return append(a, v), copied + return append(a, v) } func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { @@ -361,26 +360,16 @@ func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { nm = map[string][]storage.SeriesRef{} p.m[l.Name] = nm } - list, copied := appendWithExponentialGrowth(nm[l.Value], id) + list := appendWithExponentialGrowth(nm[l.Value], id) nm[l.Value] = list - // Return if it shouldn't be ordered, if it only has one element or if it's already ordered. - // The invariant is that the first n-1 items in the list are already sorted. - if !p.ordered || len(list) == 1 || list[len(list)-1] >= list[len(list)-2] { + if !p.ordered { return } - - if !copied { - // We have appended to the existing slice, - // and readers may already have a copy of this postings slice, - // so we need to copy it before sorting. - old := list - list = make([]storage.SeriesRef, len(old), cap(old)) - copy(list, old) - nm[l.Value] = list - } - - // Repair order violations. + // There is no guarantee that no higher ID was inserted before as they may + // be generated independently before adding them to postings. + // We repair order violations on insert. The invariant is that the first n-1 + // items in the list are already sorted. for i := len(list) - 1; i >= 1; i-- { if list[i] >= list[i-1] { break diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 7d0b717bf..96c9ed124 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -1475,58 +1475,3 @@ func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { require.Error(t, p.Err()) require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. } - -func TestMemPostings_Unordered_Add_Get(t *testing.T) { - mp := NewMemPostings() - for ref := storage.SeriesRef(1); ref < 8; ref += 2 { - // First, add next series. - next := ref + 1 - mp.Add(next, labels.FromStrings(labels.MetricName, "test", "series", strconv.Itoa(int(next)))) - nextPostings := mp.Get(labels.MetricName, "test") - - // Now add current ref. - mp.Add(ref, labels.FromStrings(labels.MetricName, "test", "series", strconv.Itoa(int(ref)))) - - // Next postings should still reference the next series. - nextExpanded, err := ExpandPostings(nextPostings) - require.NoError(t, err) - require.Len(t, nextExpanded, int(ref)) - require.Equal(t, next, nextExpanded[len(nextExpanded)-1]) - } -} - -func TestMemPostings_Concurrent_Add_Get(t *testing.T) { - refs := make(chan storage.SeriesRef) - wg := sync.WaitGroup{} - wg.Add(1) - t.Cleanup(wg.Wait) - t.Cleanup(func() { close(refs) }) - - mp := NewMemPostings() - go func() { - defer wg.Done() - for ref := range refs { - mp.Add(ref, labels.FromStrings(labels.MetricName, "test", "series", strconv.Itoa(int(ref)))) - p := mp.Get(labels.MetricName, "test") - - _, err := ExpandPostings(p) - if err != nil { - t.Errorf("unexpected error: %s", err) - } - } - }() - - for ref := storage.SeriesRef(1); ref < 8; ref += 2 { - // Add next ref in another goroutine so they would race. - refs <- ref + 1 - // Add current ref here - mp.Add(ref, labels.FromStrings(labels.MetricName, "test", "series", strconv.Itoa(int(ref)))) - - // We don't read the value of the postings here, - // this is tested in TestMemPostings_Unordered_Add_Get where it's easier to achieve the determinism. - // This test just checks that there's no data race. - p := mp.Get(labels.MetricName, "test") - _, err := ExpandPostings(p) - require.NoError(t, err) - } -} From 7c4f8778815f56f7beb28fa2a15be0c33171c860 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Sun, 3 Nov 2024 13:56:54 +0100 Subject: [PATCH 338/339] otlptranslator: Harmonize non-UTF8 sanitization w/ naming rules. (#15314) * otlptranslator: Harmonize non-UTF8 sanitization w/ naming rules. Harmonize non-UTF8 sanitization w/ Prometheus naming rules. --------- Signed-off-by: Arve Knudsen --- CHANGELOG.md | 1 + .../prometheus/normalize_label.go | 14 +--- .../prometheus/normalize_name.go | 83 +++++++++++-------- .../prometheus/normalize_name_test.go | 17 ++-- .../prometheusremotewrite/helper_test.go | 3 +- 5 files changed, 63 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de97354f2..cdfed5ba5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ * [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 * [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 * [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 +* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251 ## 3.0.0-beta.1 / 2024-10-09 diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index c22c76132..d5de2c765 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -19,6 +19,8 @@ package prometheus import ( "strings" "unicode" + + "github.com/prometheus/prometheus/util/strutil" ) // Normalizes the specified label to follow Prometheus label names standard. @@ -26,7 +28,6 @@ import ( // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels. // // Labels that start with non-letter rune will be prefixed with "key_". -// // An exception is made for double-underscores which are allowed. func NormalizeLabel(label string) string { // Trivial case @@ -34,8 +35,7 @@ func NormalizeLabel(label string) string { return label } - // Replace all non-alphanumeric runes with underscores - label = strings.Map(sanitizeRune, label) + label = strutil.SanitizeLabelName(label) // If label starts with a number, prepend with "key_" if unicode.IsDigit(rune(label[0])) { @@ -46,11 +46,3 @@ func NormalizeLabel(label string) string { return label } - -// Return '_' for anything non-alphanumeric. -func sanitizeRune(r rune) rune { - if unicode.IsLower(r) || unicode.IsUpper(r) || unicode.IsDigit(r) { - return r - } - return '_' -} diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 94fb4465f..0119b64df 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -17,9 +17,12 @@ package prometheus import ( + "regexp" + "slices" "strings" "unicode" + "github.com/prometheus/prometheus/util/strutil" "go.opentelemetry.io/collector/pdata/pmetric" ) @@ -84,24 +87,27 @@ var perUnitMap = map[string]string{ // // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels, // https://prometheus.io/docs/practices/naming/#metric-and-label-naming -// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. +// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes bool) string { - var metricName string - // Full normalization following standard Prometheus naming conventions if addMetricSuffixes { return normalizeName(metric, namespace) } - // Simple case (no full normalization, no units, etc.), we simply trim out forbidden chars - metricName = RemovePromForbiddenRunes(metric.Name()) + // Regexp for metric name characters that should be replaced with _. + invalidMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:_]`) + + // Simple case (no full normalization, no units, etc.). + metricName := strings.Join(strings.FieldsFunc(metric.Name(), func(r rune) bool { + return invalidMetricCharRE.MatchString(string(r)) + }), "_") // Namespace? if namespace != "" { return namespace + "_" + metricName } - // Metric name starts with a digit? Prefix it with an underscore + // Metric name starts with a digit? Prefix it with an underscore. if metricName != "" && unicode.IsDigit(rune(metricName[0])) { metricName = "_" + metricName } @@ -109,12 +115,17 @@ func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffix return metricName } -// Build a normalized name for the specified metric +// Build a normalized name for the specified metric. func normalizeName(metric pmetric.Metric, namespace string) string { - // Split metric name into "tokens" (remove all non-alphanumerics) + // Regexp for characters that can't be in a metric name token. + nonTokenMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:]`) + + // Split metric name into "tokens" (of supported metric name runes). + // Note that this has the side effect of replacing multiple consecutive underscores with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. nameTokens := strings.FieldsFunc( metric.Name(), - func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != ':' }, + func(r rune) bool { return nonTokenMetricCharRE.MatchString(string(r)) }, ) // Split unit at the '/' if any @@ -123,11 +134,12 @@ func normalizeName(metric pmetric.Metric, namespace string) string { // Main unit // Append if not blank, doesn't contain '{}', and is not present in metric name already if len(unitTokens) > 0 { + var mainUnitProm, perUnitProm string mainUnitOTel := strings.TrimSpace(unitTokens[0]) if mainUnitOTel != "" && !strings.ContainsAny(mainUnitOTel, "{}") { - mainUnitProm := CleanUpString(unitMapGetOrDefault(mainUnitOTel)) - if mainUnitProm != "" && !contains(nameTokens, mainUnitProm) { - nameTokens = append(nameTokens, mainUnitProm) + mainUnitProm = cleanUpUnit(unitMapGetOrDefault(mainUnitOTel)) + if slices.Contains(nameTokens, mainUnitProm) { + mainUnitProm = "" } } @@ -136,13 +148,26 @@ func normalizeName(metric pmetric.Metric, namespace string) string { if len(unitTokens) > 1 && unitTokens[1] != "" { perUnitOTel := strings.TrimSpace(unitTokens[1]) if perUnitOTel != "" && !strings.ContainsAny(perUnitOTel, "{}") { - perUnitProm := CleanUpString(perUnitMapGetOrDefault(perUnitOTel)) - if perUnitProm != "" && !contains(nameTokens, perUnitProm) { - nameTokens = append(nameTokens, "per", perUnitProm) + perUnitProm = cleanUpUnit(perUnitMapGetOrDefault(perUnitOTel)) + } + if perUnitProm != "" { + perUnitProm = "per_" + perUnitProm + if slices.Contains(nameTokens, perUnitProm) { + perUnitProm = "" } } } + if perUnitProm != "" { + mainUnitProm = strings.TrimSuffix(mainUnitProm, "_") + } + + if mainUnitProm != "" { + nameTokens = append(nameTokens, mainUnitProm) + } + if perUnitProm != "" { + nameTokens = append(nameTokens, perUnitProm) + } } // Append _total for Counters @@ -235,15 +260,15 @@ func removeSuffix(tokens []string, suffix string) []string { return tokens } -// Clean up specified string so it's Prometheus compliant -func CleanUpString(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsUpper(r) && !unicode.IsLower(r) && !unicode.IsDigit(r) }), "_") -} - -func RemovePromForbiddenRunes(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { - return !unicode.IsUpper(r) && !unicode.IsLower(r) && !unicode.IsDigit(r) && r != '_' && r != ':' - }), "_") +// cleanUpUnit cleans up unit so it matches model.LabelNameRE. +func cleanUpUnit(unit string) string { + // Multiple consecutive underscores are replaced with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. + multipleUnderscoresRE := regexp.MustCompile(`__+`) + return strings.TrimPrefix(multipleUnderscoresRE.ReplaceAllString( + strutil.SanitizeLabelName(unit), + "_", + ), "_") } // Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit @@ -264,16 +289,6 @@ func perUnitMapGetOrDefault(perUnit string) string { return perUnit } -// Returns whether the slice contains the specified value -func contains(slice []string, value string) bool { - for _, sliceEntry := range slice { - if sliceEntry == value { - return true - } - } - return false -} - // Remove the specified value from the slice func removeItem(slice []string, value string) []string { newSlice := make([]string, 0, len(slice)) diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 3ea4b70a5..2d5648e84 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -148,13 +148,13 @@ func TestNamespace(t *testing.T) { require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space")) } -func TestCleanUpString(t *testing.T) { - require.Equal(t, "", CleanUpString("")) - require.Equal(t, "a_b", CleanUpString("a b")) - require.Equal(t, "hello_world", CleanUpString("hello, world!")) - require.Equal(t, "hello_you_2", CleanUpString("hello you 2")) - require.Equal(t, "1000", CleanUpString("$1000")) - require.Equal(t, "", CleanUpString("*+$^=)")) +func TestCleanUpUnit(t *testing.T) { + require.Equal(t, "", cleanUpUnit("")) + require.Equal(t, "a_b", cleanUpUnit("a b")) + require.Equal(t, "hello_world", cleanUpUnit("hello, world")) + require.Equal(t, "hello_you_2", cleanUpUnit("hello you 2")) + require.Equal(t, "1000", cleanUpUnit("$1000")) + require.Equal(t, "", cleanUpUnit("*+$^=)")) } func TestUnitMapGetOrDefault(t *testing.T) { @@ -179,7 +179,7 @@ func TestRemoveItem(t *testing.T) { require.Equal(t, []string{"b", "c"}, removeItem([]string{"a", "b", "c"}, "a")) } -func TestBuildCompliantNameWithNormalize(t *testing.T) { +func TestBuildCompliantNameWithSuffixes(t *testing.T) { require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true)) require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true)) require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true)) @@ -190,6 +190,7 @@ func TestBuildCompliantNameWithNormalize(t *testing.T) { require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true)) // Slashes in units are converted. require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true)) + require.Equal(t, "metric_with_foreign_characters_total", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", true)) } func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index 9a994c5a4..b22282097 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -48,7 +48,6 @@ func TestCreateAttributes(t *testing.T) { resource.Attributes().PutStr(k, v) } attrs := pcommon.NewMap() - attrs.PutStr("__name__", "test_metric") attrs.PutStr("metric-attr", "metric value") testCases := []struct { @@ -162,7 +161,7 @@ func TestCreateAttributes(t *testing.T) { settings := Settings{ PromoteResourceAttributes: tc.promoteResourceAttributes, } - lbls := createAttributes(resource, attrs, settings, nil, false) + lbls := createAttributes(resource, attrs, settings, nil, false, model.MetricNameLabel, "test_metric") assert.ElementsMatch(t, lbls, tc.expectedLabels) }) From 4b56af7eb824d8e7dc13994b662b5c5b39928629 Mon Sep 17 00:00:00 2001 From: Alban Hurtaud Date: Mon, 4 Nov 2024 08:26:26 +0100 Subject: [PATCH 339/339] Add hidden flag for the delayed compaction random time window (#14919) * Add hidden flag for the delayed compaction random time window Signed-off-by: Alban HURTAUD * Update cmd/prometheus/main.go Co-authored-by: Ayoub Mrini Signed-off-by: Alban Hurtaud * Update cmd/prometheus/main.go Co-authored-by: Ayoub Mrini Signed-off-by: Alban Hurtaud * Update tsdb/db.go Co-authored-by: Ayoub Mrini Signed-off-by: Alban Hurtaud * Fix flag name according to review - add test for delay Signed-off-by: Alban HURTAUD * Fix afer main rebase Signed-off-by: Alban HURTAUD * Implement review comments Signed-off-by: Alban HURTAUD * Update generatedelaytest to try with limit values Signed-off-by: Alban HURTAUD --------- Signed-off-by: Alban HURTAUD Signed-off-by: Alban Hurtaud Co-authored-by: Ayoub Mrini --- cmd/prometheus/main.go | 11 +++++++++++ tsdb/db.go | 9 +++++++-- tsdb/db_test.go | 45 +++++++++++++++++++++++++++++++----------- 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 045389770..8fb6d4d38 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -433,6 +433,9 @@ func main() { serverOnlyFlag(a, "storage.tsdb.samples-per-chunk", "Target number of samples per chunk."). Default("120").Hidden().IntVar(&cfg.tsdb.SamplesPerChunk) + serverOnlyFlag(a, "storage.tsdb.delayed-compaction.max-percent", "Sets the upper limit for the random compaction delay, specified as a percentage of the head chunk range. 100 means the compaction can be delayed by up to the entire head chunk range. Only effective when the delayed-compaction feature flag is enabled."). + Default("10").Hidden().IntVar(&cfg.tsdb.CompactionDelayMaxPercent) + agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage."). Default("data-agent/").StringVar(&cfg.agentStoragePath) @@ -663,6 +666,12 @@ func main() { cfg.tsdb.MaxBlockDuration = maxBlockDuration } + + // Delayed compaction checks + if cfg.tsdb.EnableDelayedCompaction && (cfg.tsdb.CompactionDelayMaxPercent > 100 || cfg.tsdb.CompactionDelayMaxPercent <= 0) { + logger.Warn("The --storage.tsdb.delayed-compaction.max-percent should have a value between 1 and 100. Using default", "default", tsdb.DefaultCompactionDelayMaxPercent) + cfg.tsdb.CompactionDelayMaxPercent = tsdb.DefaultCompactionDelayMaxPercent + } } noStepSubqueryInterval := &safePromQLNoStepSubqueryInterval{} @@ -1796,6 +1805,7 @@ type tsdbOptions struct { EnableMemorySnapshotOnShutdown bool EnableNativeHistograms bool EnableDelayedCompaction bool + CompactionDelayMaxPercent int EnableOverlappingCompaction bool EnableOOONativeHistograms bool } @@ -1820,6 +1830,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { EnableOOONativeHistograms: opts.EnableOOONativeHistograms, OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow, EnableDelayedCompaction: opts.EnableDelayedCompaction, + CompactionDelayMaxPercent: opts.CompactionDelayMaxPercent, EnableOverlappingCompaction: opts.EnableOverlappingCompaction, } } diff --git a/tsdb/db.go b/tsdb/db.go index 997bad36c..bb9fe6ad7 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -52,6 +52,9 @@ const ( // DefaultBlockDuration in milliseconds. DefaultBlockDuration = int64(2 * time.Hour / time.Millisecond) + // DefaultCompactionDelayMaxPercent in percentage. + DefaultCompactionDelayMaxPercent = 10 + // Block dir suffixes to make deletion and creation operations atomic. // We decided to do suffixes instead of creating meta.json as last (or delete as first) one, // because in error case you still can recover meta.json from the block content within local TSDB dir. @@ -86,6 +89,7 @@ func DefaultOptions() *Options { EnableOverlappingCompaction: true, EnableSharding: false, EnableDelayedCompaction: false, + CompactionDelayMaxPercent: DefaultCompactionDelayMaxPercent, CompactionDelay: time.Duration(0), } } @@ -204,6 +208,8 @@ type Options struct { // CompactionDelay delays the start time of auto compactions. // It can be increased by up to one minute if the DB does not commit too often. CompactionDelay time.Duration + // CompactionDelayMaxPercent is the upper limit for CompactionDelay, specified as a percentage of the head chunk range. + CompactionDelayMaxPercent int // NewCompactorFunc is a function that returns a TSDB compactor. NewCompactorFunc NewCompactorFunc @@ -1986,8 +1992,7 @@ func (db *DB) EnableCompactions() { } func (db *DB) generateCompactionDelay() time.Duration { - // Up to 10% of the head's chunkRange. - return time.Duration(rand.Int63n(db.head.chunkRange.Load()/10)) * time.Millisecond + return time.Duration(rand.Int63n(db.head.chunkRange.Load()*int64(db.opts.CompactionDelayMaxPercent)/100)) * time.Millisecond } // ForceHeadMMap is intended for use only in tests and benchmarks. diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 3f0fc0c84..50f50a3a2 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -8896,24 +8896,45 @@ func TestBlockQuerierAndBlockChunkQuerier(t *testing.T) { } func TestGenerateCompactionDelay(t *testing.T) { - assertDelay := func(delay time.Duration) { + assertDelay := func(delay time.Duration, expectedMaxPercentDelay int) { t.Helper() require.GreaterOrEqual(t, delay, time.Duration(0)) - // Less than 10% of the chunkRange. - require.LessOrEqual(t, delay, 6000*time.Millisecond) + // Expect to generate a delay up to MaxPercentDelay of the head chunk range + require.LessOrEqual(t, delay, (time.Duration(60000*expectedMaxPercentDelay/100) * time.Millisecond)) } opts := DefaultOptions() - opts.EnableDelayedCompaction = true - db := openTestDB(t, opts, []int64{60000}) - defer func() { - require.NoError(t, db.Close()) - }() - // The offset is generated and changed while opening. - assertDelay(db.opts.CompactionDelay) + cases := []struct { + compactionDelayPercent int + }{ + { + compactionDelayPercent: 1, + }, + { + compactionDelayPercent: 10, + }, + { + compactionDelayPercent: 60, + }, + { + compactionDelayPercent: 100, + }, + } - for i := 0; i < 1000; i++ { - assertDelay(db.generateCompactionDelay()) + opts.EnableDelayedCompaction = true + + for _, c := range cases { + opts.CompactionDelayMaxPercent = c.compactionDelayPercent + db := openTestDB(t, opts, []int64{60000}) + defer func() { + require.NoError(t, db.Close()) + }() + // The offset is generated and changed while opening. + assertDelay(db.opts.CompactionDelay, c.compactionDelayPercent) + + for i := 0; i < 1000; i++ { + assertDelay(db.generateCompactionDelay(), c.compactionDelayPercent) + } } }