2021-08-03 12:14:26 +00:00
|
|
|
// Copyright 2021 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package tsdb
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-11-16 18:54:41 +00:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
2021-08-03 12:14:26 +00:00
|
|
|
"math"
|
2024-01-15 16:24:46 +00:00
|
|
|
"slices"
|
2022-09-15 07:52:09 +00:00
|
|
|
"sync"
|
2021-08-03 12:14:26 +00:00
|
|
|
|
2021-11-08 14:23:17 +00:00
|
|
|
"github.com/prometheus/prometheus/model/labels"
|
2021-08-03 12:14:26 +00:00
|
|
|
"github.com/prometheus/prometheus/storage"
|
|
|
|
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
|
|
|
"github.com/prometheus/prometheus/tsdb/chunks"
|
|
|
|
"github.com/prometheus/prometheus/tsdb/index"
|
|
|
|
)
|
|
|
|
|
|
|
|
func (h *Head) ExemplarQuerier(ctx context.Context) (storage.ExemplarQuerier, error) {
|
|
|
|
return h.exemplars.ExemplarQuerier(ctx)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Index returns an IndexReader against the block.
|
|
|
|
func (h *Head) Index() (IndexReader, error) {
|
|
|
|
return h.indexRange(math.MinInt64, math.MaxInt64), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Head) indexRange(mint, maxt int64) *headIndexReader {
|
|
|
|
if hmin := h.MinTime(); hmin > mint {
|
|
|
|
mint = hmin
|
|
|
|
}
|
|
|
|
return &headIndexReader{head: h, mint: mint, maxt: maxt}
|
|
|
|
}
|
|
|
|
|
|
|
|
type headIndexReader struct {
|
|
|
|
head *Head
|
|
|
|
mint, maxt int64
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *headIndexReader) Close() error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *headIndexReader) Symbols() index.StringIter {
|
2021-09-08 09:18:48 +00:00
|
|
|
return h.head.postings.Symbols()
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// SortedLabelValues returns label values present in the head for the
|
|
|
|
// specific label name that are within the time range mint to maxt.
|
|
|
|
// If matchers are specified the returned result set is reduced
|
|
|
|
// to label values of metrics matching the matchers.
|
2023-09-14 14:02:04 +00:00
|
|
|
func (h *headIndexReader) SortedLabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
|
|
|
|
values, err := h.LabelValues(ctx, name, matchers...)
|
2021-08-03 12:14:26 +00:00
|
|
|
if err == nil {
|
2022-09-30 14:33:56 +00:00
|
|
|
slices.Sort(values)
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
return values, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// LabelValues returns label values present in the head for the
|
|
|
|
// specific label name that are within the time range mint to maxt.
|
|
|
|
// If matchers are specified the returned result set is reduced
|
|
|
|
// to label values of metrics matching the matchers.
|
2023-09-14 14:02:04 +00:00
|
|
|
func (h *headIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
|
2021-08-03 12:14:26 +00:00
|
|
|
if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() {
|
|
|
|
return []string{}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(matchers) == 0 {
|
2023-09-14 14:02:04 +00:00
|
|
|
return h.head.postings.LabelValues(ctx, name), nil
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2023-09-14 14:02:04 +00:00
|
|
|
return labelValuesWithMatchers(ctx, h, name, matchers...)
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// LabelNames returns all the unique label names present in the head
|
|
|
|
// that are within the time range mint to maxt.
|
2023-09-14 08:39:51 +00:00
|
|
|
func (h *headIndexReader) LabelNames(ctx context.Context, matchers ...*labels.Matcher) ([]string, error) {
|
2021-08-03 12:14:26 +00:00
|
|
|
if h.maxt < h.head.MinTime() || h.mint > h.head.MaxTime() {
|
|
|
|
return []string{}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(matchers) == 0 {
|
|
|
|
labelNames := h.head.postings.LabelNames()
|
2022-09-30 14:33:56 +00:00
|
|
|
slices.Sort(labelNames)
|
2021-08-03 12:14:26 +00:00
|
|
|
return labelNames, nil
|
|
|
|
}
|
|
|
|
|
2023-09-14 08:39:51 +00:00
|
|
|
return labelNamesWithMatchers(ctx, h, matchers...)
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Postings returns the postings list iterator for the label pairs.
|
2023-09-13 15:45:06 +00:00
|
|
|
func (h *headIndexReader) Postings(ctx context.Context, name string, values ...string) (index.Postings, error) {
|
Label values with matchers by intersecting postings (#9907)
* LabelValues w/matchers by intersecting postings
Instead of iterating all matched series to find the values, this
checks if each one of the label values is present in the matched series
(postings).
Pending to be benchmarked.
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Benchmark labelValuesWithMatchers
name old time/op new time/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 157ms ± 0% 48ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 1.80s ± 0% 0.46s ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 144ms ± 0% 57ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304ms ± 0% 111ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 761ms ± 0% 164ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 6.11µs ± 0% 6.62µs ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 117ms ± 0% 62ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 1.44s ± 0% 0.24s ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 92.1ms ± 0% 70.3ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 196ms ± 0% 115ms ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 1.23s ± 0% 0.21s ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 1.06ms ± 0% 0.88ms ± 0%
name old alloc/op new alloc/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 29.5MB ± 0% 26.9MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 46.8MB ± 0% 251.5MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 29.5MB ± 0% 22.3MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 46.8MB ± 0% 23.9MB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 10.3kB ± 0% 138535.2kB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 5.54kB ± 0% 7.09kB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 39.1MB ± 0% 28.5MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 287MB ± 0% 253MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 34.3MB ± 0% 23.9MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 51.6MB ± 0% 25.5MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 144MB ± 0% 139MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 6.43kB ± 0% 8.66kB ± 0%
name old allocs/op new allocs/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 104k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 204k ± 0% 600k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 104k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 204k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 66.0 ± 0% 255.0 ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 205.0 ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 304k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 5.20M ± 0% 0.70M ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 204k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 3.00M ± 0% 0.00M ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 247.0 ± 0%
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Don't expand postings to intersect them
Using a min heap we can check whether matched postings intersect with
each one of the label values postings. This avoid expanding postings
(and thus having all of them in memory at any point).
Slightly slower than the expanding postings version for some cases, but
definitely pays the price once the cardinality grows.
Still offers 10x latency improvement where previous latencies were
reaching 1s.
Benchmark results:
name \ time/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 157ms ± 0% 48ms ± 0% 110ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 1.80s ± 0% 0.46s ± 0% 0.18s ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 144ms ± 0% 57ms ± 0% 125ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304ms ± 0% 111ms ± 0% 177ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 761ms ± 0% 164ms ± 0% 134ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 6.11µs ± 0% 6.62µs ± 0% 4.29µs ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 117ms ± 0% 62ms ± 0% 120ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 1.44s ± 0% 0.24s ± 0% 0.15s ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 92.1ms ± 0% 70.3ms ± 0% 125.4ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 196ms ± 0% 115ms ± 0% 170ms ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 1.23s ± 0% 0.21s ± 0% 0.14s ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 1.06ms ± 0% 0.88ms ± 0% 0.92ms ± 0%
name \ alloc/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 29.5MB ± 0% 26.9MB ± 0% 19.1MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 46.8MB ± 0% 251.5MB ± 0% 36.3MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 29.5MB ± 0% 22.3MB ± 0% 19.1MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 46.8MB ± 0% 23.9MB ± 0% 20.7MB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 10.3kB ± 0% 138535.2kB ± 0% 6.4kB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 5.54kB ± 0% 7.09kB ± 0% 4.30kB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 39.1MB ± 0% 28.5MB ± 0% 20.7MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 287MB ± 0% 253MB ± 0% 38MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 34.3MB ± 0% 23.9MB ± 0% 20.7MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 51.6MB ± 0% 25.5MB ± 0% 22.3MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 144MB ± 0% 139MB ± 0% 0MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 6.43kB ± 0% 8.66kB ± 0% 5.86kB ± 0%
name \ allocs/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 104k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 204k ± 0% 600k ± 0% 400k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 104k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 204k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 66.0 ± 0% 255.0 ± 0% 139.0 ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 205.0 ± 0% 87.0 ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 304k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 5.20M ± 0% 0.70M ± 0% 0.50M ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 204k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 3.00M ± 0% 0.00M ± 0% 0.00M ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 247.0 ± 0% 129.0 ± 0%
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Apply comment suggestions from the code review
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
* Change else { if } to else if
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Remove sorting of label values
We were not sorting them before, so no need to sort them now
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
2021-12-28 14:59:03 +00:00
|
|
|
switch len(values) {
|
|
|
|
case 0:
|
|
|
|
return index.EmptyPostings(), nil
|
|
|
|
case 1:
|
|
|
|
return h.head.postings.Get(name, values[0]), nil
|
|
|
|
default:
|
|
|
|
res := make([]index.Postings, 0, len(values))
|
|
|
|
for _, value := range values {
|
2023-01-10 09:51:49 +00:00
|
|
|
if p := h.head.postings.Get(name, value); !index.IsEmptyPostingsType(p) {
|
|
|
|
res = append(res, p)
|
|
|
|
}
|
Label values with matchers by intersecting postings (#9907)
* LabelValues w/matchers by intersecting postings
Instead of iterating all matched series to find the values, this
checks if each one of the label values is present in the matched series
(postings).
Pending to be benchmarked.
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Benchmark labelValuesWithMatchers
name old time/op new time/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 157ms ± 0% 48ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 1.80s ± 0% 0.46s ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 144ms ± 0% 57ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304ms ± 0% 111ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 761ms ± 0% 164ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 6.11µs ± 0% 6.62µs ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 117ms ± 0% 62ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 1.44s ± 0% 0.24s ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 92.1ms ± 0% 70.3ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 196ms ± 0% 115ms ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 1.23s ± 0% 0.21s ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 1.06ms ± 0% 0.88ms ± 0%
name old alloc/op new alloc/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 29.5MB ± 0% 26.9MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 46.8MB ± 0% 251.5MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 29.5MB ± 0% 22.3MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 46.8MB ± 0% 23.9MB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 10.3kB ± 0% 138535.2kB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 5.54kB ± 0% 7.09kB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 39.1MB ± 0% 28.5MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 287MB ± 0% 253MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 34.3MB ± 0% 23.9MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 51.6MB ± 0% 25.5MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 144MB ± 0% 139MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 6.43kB ± 0% 8.66kB ± 0%
name old allocs/op new allocs/op
Querier/Head/labelValuesWithMatchers/i_with_n="1" 104k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 204k ± 0% 600k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 104k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 204k ± 0% 500k ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 66.0 ± 0% 255.0 ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 205.0 ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 304k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 5.20M ± 0% 0.70M ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 204k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304k ± 0% 600k ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 3.00M ± 0% 0.00M ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 247.0 ± 0%
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Don't expand postings to intersect them
Using a min heap we can check whether matched postings intersect with
each one of the label values postings. This avoid expanding postings
(and thus having all of them in memory at any point).
Slightly slower than the expanding postings version for some cases, but
definitely pays the price once the cardinality grows.
Still offers 10x latency improvement where previous latencies were
reaching 1s.
Benchmark results:
name \ time/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 157ms ± 0% 48ms ± 0% 110ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 1.80s ± 0% 0.46s ± 0% 0.18s ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 144ms ± 0% 57ms ± 0% 125ms ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304ms ± 0% 111ms ± 0% 177ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 761ms ± 0% 164ms ± 0% 134ms ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 6.11µs ± 0% 6.62µs ± 0% 4.29µs ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 117ms ± 0% 62ms ± 0% 120ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 1.44s ± 0% 0.24s ± 0% 0.15s ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 92.1ms ± 0% 70.3ms ± 0% 125.4ms ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 196ms ± 0% 115ms ± 0% 170ms ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 1.23s ± 0% 0.21s ± 0% 0.14s ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 1.06ms ± 0% 0.88ms ± 0% 0.92ms ± 0%
name \ alloc/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 29.5MB ± 0% 26.9MB ± 0% 19.1MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 46.8MB ± 0% 251.5MB ± 0% 36.3MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 29.5MB ± 0% 22.3MB ± 0% 19.1MB ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 46.8MB ± 0% 23.9MB ± 0% 20.7MB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 10.3kB ± 0% 138535.2kB ± 0% 6.4kB ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 5.54kB ± 0% 7.09kB ± 0% 4.30kB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 39.1MB ± 0% 28.5MB ± 0% 20.7MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 287MB ± 0% 253MB ± 0% 38MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 34.3MB ± 0% 23.9MB ± 0% 20.7MB ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 51.6MB ± 0% 25.5MB ± 0% 22.3MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 144MB ± 0% 139MB ± 0% 0MB ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 6.43kB ± 0% 8.66kB ± 0% 5.86kB ± 0%
name \ allocs/op old.txt intersect.txt intersect_noexpand.txt
Querier/Head/labelValuesWithMatchers/i_with_n="1" 104k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="^.+$" 204k ± 0% 600k ± 0% 400k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",j!="foo" 104k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 204k ± 0% 500k ± 0% 300k ± 0%
Querier/Head/labelValuesWithMatchers/n_with_j!="foo" 66.0 ± 0% 255.0 ± 0% 139.0 ± 0%
Querier/Head/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 205.0 ± 0% 87.0 ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1" 304k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="^.+$" 5.20M ± 0% 0.70M ± 0% 0.50M ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",j!="foo" 204k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/i_with_n="1",i=~"^.*$",j!="foo" 304k ± 0% 600k ± 0% 400k ± 0%
Querier/Block/labelValuesWithMatchers/n_with_j!="foo" 3.00M ± 0% 0.00M ± 0% 0.00M ± 0%
Querier/Block/labelValuesWithMatchers/n_with_i="1" 61.0 ± 0% 247.0 ± 0% 129.0 ± 0%
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Apply comment suggestions from the code review
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
* Change else { if } to else if
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
* Remove sorting of label values
We were not sorting them before, so no need to sort them now
Signed-off-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
2021-12-28 14:59:03 +00:00
|
|
|
}
|
2023-09-13 15:45:06 +00:00
|
|
|
return index.Merge(ctx, res...), nil
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-09 09:55:30 +00:00
|
|
|
func (h *headIndexReader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) index.Postings {
|
|
|
|
return h.head.postings.PostingsForLabelMatching(ctx, name, match)
|
|
|
|
}
|
|
|
|
|
2024-11-19 15:49:01 +00:00
|
|
|
func (h *headIndexReader) PostingsForAllLabelValues(ctx context.Context, name string) index.Postings {
|
|
|
|
return h.head.postings.PostingsForAllLabelValues(ctx, name)
|
|
|
|
}
|
|
|
|
|
2021-08-03 12:14:26 +00:00
|
|
|
func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings {
|
|
|
|
series := make([]*memSeries, 0, 128)
|
|
|
|
|
|
|
|
// Fetch all the series only once.
|
|
|
|
for p.Next() {
|
2021-11-06 10:10:04 +00:00
|
|
|
s := h.head.series.getByID(chunks.HeadSeriesRef(p.At()))
|
2021-08-03 12:14:26 +00:00
|
|
|
if s == nil {
|
2024-09-10 01:41:53 +00:00
|
|
|
h.head.logger.Debug("Looked up series not found")
|
2021-08-03 12:14:26 +00:00
|
|
|
} else {
|
|
|
|
series = append(series, s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err := p.Err(); err != nil {
|
2023-11-16 18:54:41 +00:00
|
|
|
return index.ErrPostings(fmt.Errorf("expand postings: %w", err))
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2023-09-21 20:53:51 +00:00
|
|
|
slices.SortFunc(series, func(a, b *memSeries) int {
|
2024-07-05 09:11:32 +00:00
|
|
|
return labels.Compare(a.labels(), b.labels())
|
2021-08-03 12:14:26 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
// Convert back to list.
|
2021-11-06 10:10:04 +00:00
|
|
|
ep := make([]storage.SeriesRef, 0, len(series))
|
2021-08-03 12:14:26 +00:00
|
|
|
for _, p := range series {
|
2021-11-06 10:10:04 +00:00
|
|
|
ep = append(ep, storage.SeriesRef(p.ref))
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
return index.NewListPostings(ep)
|
|
|
|
}
|
|
|
|
|
2024-01-29 11:57:27 +00:00
|
|
|
// ShardedPostings implements IndexReader. This function returns an failing postings list if sharding
|
|
|
|
// has not been enabled in the Head.
|
|
|
|
func (h *headIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCount uint64) index.Postings {
|
|
|
|
if !h.head.opts.EnableSharding {
|
|
|
|
return index.ErrPostings(errors.New("sharding is disabled"))
|
|
|
|
}
|
|
|
|
|
|
|
|
out := make([]storage.SeriesRef, 0, 128)
|
|
|
|
|
|
|
|
for p.Next() {
|
|
|
|
s := h.head.series.getByID(chunks.HeadSeriesRef(p.At()))
|
|
|
|
if s == nil {
|
2024-09-10 01:41:53 +00:00
|
|
|
h.head.logger.Debug("Looked up series not found")
|
2024-01-29 11:57:27 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if the series belong to the shard.
|
2024-08-26 06:48:37 +00:00
|
|
|
if s.shardHash%shardCount != shardIndex {
|
2024-01-29 11:57:27 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
out = append(out, storage.SeriesRef(s.ref))
|
|
|
|
}
|
|
|
|
|
|
|
|
return index.NewListPostings(out)
|
|
|
|
}
|
|
|
|
|
2021-08-03 12:14:26 +00:00
|
|
|
// Series returns the series for the given reference.
|
2024-01-29 11:57:27 +00:00
|
|
|
// Chunks are skipped if chks is nil.
|
2022-12-15 18:19:15 +00:00
|
|
|
func (h *headIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error {
|
2021-11-06 10:10:04 +00:00
|
|
|
s := h.head.series.getByID(chunks.HeadSeriesRef(ref))
|
2021-08-03 12:14:26 +00:00
|
|
|
|
|
|
|
if s == nil {
|
|
|
|
h.head.metrics.seriesNotFound.Inc()
|
|
|
|
return storage.ErrNotFound
|
|
|
|
}
|
2024-07-05 09:11:32 +00:00
|
|
|
builder.Assign(s.labels())
|
2021-08-03 12:14:26 +00:00
|
|
|
|
2024-01-29 11:57:27 +00:00
|
|
|
if chks == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-08-03 12:14:26 +00:00
|
|
|
s.Lock()
|
|
|
|
defer s.Unlock()
|
|
|
|
|
|
|
|
*chks = (*chks)[:0]
|
2024-08-12 16:14:41 +00:00
|
|
|
*chks = appendSeriesChunks(s, h.mint, h.maxt, *chks)
|
2024-06-24 20:06:50 +00:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-08-12 16:14:41 +00:00
|
|
|
func appendSeriesChunks(s *memSeries, mint, maxt int64, chks []chunks.Meta) []chunks.Meta {
|
2021-08-03 12:14:26 +00:00
|
|
|
for i, c := range s.mmappedChunks {
|
|
|
|
// Do not expose chunks that are outside of the specified range.
|
2024-06-24 20:06:50 +00:00
|
|
|
if !c.OverlapsClosedInterval(mint, maxt) {
|
2021-08-03 12:14:26 +00:00
|
|
|
continue
|
|
|
|
}
|
2024-08-12 16:14:41 +00:00
|
|
|
chks = append(chks, chunks.Meta{
|
2021-08-03 12:14:26 +00:00
|
|
|
MinTime: c.minTime,
|
|
|
|
MaxTime: c.maxTime,
|
2021-11-17 13:05:10 +00:00
|
|
|
Ref: chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.headChunkID(i))),
|
2021-08-03 12:14:26 +00:00
|
|
|
})
|
|
|
|
}
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
|
|
|
|
if s.headChunks != nil {
|
|
|
|
var maxTime int64
|
|
|
|
var i, j int
|
|
|
|
for i = s.headChunks.len() - 1; i >= 0; i-- {
|
|
|
|
chk := s.headChunks.atOffset(i)
|
|
|
|
if i == 0 {
|
|
|
|
// Set the head chunk as open (being appended to) for the first headChunk.
|
|
|
|
maxTime = math.MaxInt64
|
|
|
|
} else {
|
|
|
|
maxTime = chk.maxTime
|
|
|
|
}
|
2024-06-24 20:06:50 +00:00
|
|
|
if chk.OverlapsClosedInterval(mint, maxt) {
|
2024-08-12 16:14:41 +00:00
|
|
|
chks = append(chks, chunks.Meta{
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
MinTime: chk.minTime,
|
|
|
|
MaxTime: maxTime,
|
|
|
|
Ref: chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.headChunkID(len(s.mmappedChunks)+j))),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
j++
|
|
|
|
}
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
2024-08-12 16:14:41 +00:00
|
|
|
return chks
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2022-09-20 17:05:50 +00:00
|
|
|
// headChunkID returns the HeadChunkID referred to by the given position.
|
|
|
|
// * 0 <= pos < len(s.mmappedChunks) refer to s.mmappedChunks[pos]
|
2023-10-03 20:09:25 +00:00
|
|
|
// * pos >= len(s.mmappedChunks) refers to s.headChunks linked list.
|
2021-11-17 13:05:10 +00:00
|
|
|
func (s *memSeries) headChunkID(pos int) chunks.HeadChunkID {
|
|
|
|
return chunks.HeadChunkID(pos) + s.firstChunkID
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 12:41:44 +00:00
|
|
|
const oooChunkIDMask = 1 << 23
|
|
|
|
|
2022-09-20 17:05:50 +00:00
|
|
|
// oooHeadChunkID returns the HeadChunkID referred to by the given position.
|
2024-06-24 12:41:44 +00:00
|
|
|
// Only the bottom 24 bits are used. Bit 23 is always 1 for an OOO chunk; for the rest:
|
2022-09-20 17:05:50 +00:00
|
|
|
// * 0 <= pos < len(s.oooMmappedChunks) refer to s.oooMmappedChunks[pos]
|
|
|
|
// * pos == len(s.oooMmappedChunks) refers to s.oooHeadChunk
|
2022-12-28 10:19:41 +00:00
|
|
|
// The caller must ensure that s.ooo is not nil.
|
2022-09-20 17:05:50 +00:00
|
|
|
func (s *memSeries) oooHeadChunkID(pos int) chunks.HeadChunkID {
|
2024-06-24 12:41:44 +00:00
|
|
|
return (chunks.HeadChunkID(pos) + s.ooo.firstOOOChunkID) | oooChunkIDMask
|
|
|
|
}
|
|
|
|
|
2024-08-12 16:14:41 +00:00
|
|
|
func unpackHeadChunkRef(ref chunks.ChunkRef) (seriesID chunks.HeadSeriesRef, chunkID chunks.HeadChunkID, isOOO bool) {
|
2024-06-24 12:41:44 +00:00
|
|
|
sid, cid := chunks.HeadChunkRef(ref).Unpack()
|
|
|
|
return sid, (cid & (oooChunkIDMask - 1)), (cid & oooChunkIDMask) != 0
|
2022-09-20 17:05:50 +00:00
|
|
|
}
|
|
|
|
|
2021-08-03 12:14:26 +00:00
|
|
|
// LabelValueFor returns label value for the given label name in the series referred to by ID.
|
2023-09-14 14:02:04 +00:00
|
|
|
func (h *headIndexReader) LabelValueFor(_ context.Context, id storage.SeriesRef, label string) (string, error) {
|
2021-11-06 10:10:04 +00:00
|
|
|
memSeries := h.head.series.getByID(chunks.HeadSeriesRef(id))
|
2021-08-03 12:14:26 +00:00
|
|
|
if memSeries == nil {
|
|
|
|
return "", storage.ErrNotFound
|
|
|
|
}
|
|
|
|
|
2024-07-05 09:11:32 +00:00
|
|
|
value := memSeries.labels().Get(label)
|
2021-08-03 12:14:26 +00:00
|
|
|
if value == "" {
|
|
|
|
return "", storage.ErrNotFound
|
|
|
|
}
|
|
|
|
|
|
|
|
return value, nil
|
|
|
|
}
|
|
|
|
|
2024-06-11 13:36:46 +00:00
|
|
|
// LabelNamesFor returns all the label names for the series referred to by the postings.
|
2021-08-03 12:14:26 +00:00
|
|
|
// The names returned are sorted.
|
2024-06-11 13:36:46 +00:00
|
|
|
func (h *headIndexReader) LabelNamesFor(ctx context.Context, series index.Postings) ([]string, error) {
|
2021-08-03 12:14:26 +00:00
|
|
|
namesMap := make(map[string]struct{})
|
2024-06-11 13:36:46 +00:00
|
|
|
i := 0
|
|
|
|
for series.Next() {
|
|
|
|
i++
|
|
|
|
if i%checkContextEveryNIterations == 0 && ctx.Err() != nil {
|
2023-09-14 08:39:51 +00:00
|
|
|
return nil, ctx.Err()
|
|
|
|
}
|
2024-06-11 13:36:46 +00:00
|
|
|
memSeries := h.head.series.getByID(chunks.HeadSeriesRef(series.At()))
|
2021-08-03 12:14:26 +00:00
|
|
|
if memSeries == nil {
|
2024-06-07 14:09:53 +00:00
|
|
|
// Series not found, this happens during compaction,
|
|
|
|
// when series was garbage collected after the caller got the series IDs.
|
|
|
|
continue
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
2024-07-05 09:11:32 +00:00
|
|
|
memSeries.labels().Range(func(lbl labels.Label) {
|
2021-08-03 12:14:26 +00:00
|
|
|
namesMap[lbl.Name] = struct{}{}
|
2022-03-09 22:17:40 +00:00
|
|
|
})
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
2024-06-11 13:36:46 +00:00
|
|
|
if err := series.Err(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2021-08-03 12:14:26 +00:00
|
|
|
names := make([]string, 0, len(namesMap))
|
|
|
|
for name := range namesMap {
|
|
|
|
names = append(names, name)
|
|
|
|
}
|
2022-09-30 14:33:56 +00:00
|
|
|
slices.Sort(names)
|
2021-08-03 12:14:26 +00:00
|
|
|
return names, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Chunks returns a ChunkReader against the block.
|
|
|
|
func (h *Head) Chunks() (ChunkReader, error) {
|
|
|
|
return h.chunksRange(math.MinInt64, math.MaxInt64, h.iso.State(math.MinInt64, math.MaxInt64))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *Head) chunksRange(mint, maxt int64, is *isolationState) (*headChunkReader, error) {
|
|
|
|
h.closedMtx.Lock()
|
|
|
|
defer h.closedMtx.Unlock()
|
|
|
|
if h.closed {
|
|
|
|
return nil, errors.New("can't read from a closed head")
|
|
|
|
}
|
|
|
|
if hmin := h.MinTime(); hmin > mint {
|
|
|
|
mint = hmin
|
|
|
|
}
|
|
|
|
return &headChunkReader{
|
|
|
|
head: h,
|
|
|
|
mint: mint,
|
|
|
|
maxt: maxt,
|
|
|
|
isoState: is,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type headChunkReader struct {
|
|
|
|
head *Head
|
|
|
|
mint, maxt int64
|
|
|
|
isoState *isolationState
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *headChunkReader) Close() error {
|
2022-09-27 14:01:23 +00:00
|
|
|
if h.isoState != nil {
|
|
|
|
h.isoState.Close()
|
|
|
|
}
|
2021-08-03 12:14:26 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-11-28 10:14:29 +00:00
|
|
|
// ChunkOrIterable returns the chunk for the reference number.
|
|
|
|
func (h *headChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) {
|
2023-02-19 17:34:51 +00:00
|
|
|
chk, _, err := h.chunk(meta, false)
|
2023-11-28 10:14:29 +00:00
|
|
|
return chk, nil, err
|
2023-02-19 17:34:51 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 12:41:44 +00:00
|
|
|
type ChunkReaderWithCopy interface {
|
|
|
|
ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ChunkOrIterableWithCopy returns the chunk for the reference number.
|
|
|
|
// If the chunk is the in-memory chunk, then it makes a copy and returns the copied chunk, plus the max time of the chunk.
|
|
|
|
func (h *headChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
|
|
|
|
chk, maxTime, err := h.chunk(meta, true)
|
|
|
|
return chk, nil, maxTime, err
|
2023-02-19 17:34:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// chunk returns the chunk for the reference number.
|
|
|
|
// If copyLastChunk is true, then it makes a copy of the head chunk if asked for it.
|
|
|
|
// Also returns max time of the chunk.
|
|
|
|
func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc.Chunk, int64, error) {
|
2024-08-23 16:10:02 +00:00
|
|
|
sid, cid, isOOO := unpackHeadChunkRef(meta.Ref)
|
2021-08-03 12:14:26 +00:00
|
|
|
|
|
|
|
s := h.head.series.getByID(sid)
|
|
|
|
// This means that the series has been garbage collected.
|
|
|
|
if s == nil {
|
2023-02-19 17:34:51 +00:00
|
|
|
return nil, 0, storage.ErrNotFound
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
s.Lock()
|
2024-06-26 19:48:39 +00:00
|
|
|
defer s.Unlock()
|
2024-08-23 16:10:02 +00:00
|
|
|
return h.head.chunkFromSeries(s, cid, isOOO, h.mint, h.maxt, h.isoState, copyLastChunk)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Dumb thing to defeat chunk pool.
|
|
|
|
type wrapOOOHeadChunk struct {
|
|
|
|
chunkenc.Chunk
|
2024-06-26 19:48:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Call with s locked.
|
2024-08-23 16:10:02 +00:00
|
|
|
func (h *Head) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, isOOO bool, mint, maxt int64, isoState *isolationState, copyLastChunk bool) (chunkenc.Chunk, int64, error) {
|
|
|
|
if isOOO {
|
|
|
|
chk, maxTime, err := s.oooChunk(cid, h.chunkDiskMapper, &h.memChunkPool)
|
|
|
|
return wrapOOOHeadChunk{chk}, maxTime, err
|
|
|
|
}
|
2024-08-23 15:57:43 +00:00
|
|
|
c, headChunk, isOpen, err := s.chunk(cid, h.chunkDiskMapper, &h.memChunkPool)
|
2021-08-03 12:14:26 +00:00
|
|
|
if err != nil {
|
2023-02-19 17:34:51 +00:00
|
|
|
return nil, 0, err
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
defer func() {
|
2023-02-19 17:34:51 +00:00
|
|
|
if !headChunk {
|
2021-08-03 12:14:26 +00:00
|
|
|
// Set this to nil so that Go GC can collect it after it has been used.
|
|
|
|
c.chunk = nil
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
c.prev = nil
|
2024-08-23 15:57:43 +00:00
|
|
|
h.memChunkPool.Put(c)
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
// This means that the chunk is outside the specified range.
|
2024-08-23 15:57:43 +00:00
|
|
|
if !c.OverlapsClosedInterval(mint, maxt) {
|
2023-02-19 17:34:51 +00:00
|
|
|
return nil, 0, storage.ErrNotFound
|
|
|
|
}
|
|
|
|
|
|
|
|
chk, maxTime := c.chunk, c.maxTime
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
if headChunk && isOpen && copyLastChunk {
|
2023-02-19 17:34:51 +00:00
|
|
|
// The caller may ask to copy the head chunk in order to take the
|
|
|
|
// bytes of the chunk without causing the race between read and append.
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
b := s.headChunks.chunk.Bytes()
|
2023-02-19 17:34:51 +00:00
|
|
|
newB := make([]byte, len(b))
|
|
|
|
copy(newB, b) // TODO(codesome): Use bytes.Clone() when we upgrade to Go 1.20.
|
|
|
|
// TODO(codesome): Put back in the pool (non-trivial).
|
2024-08-23 15:57:43 +00:00
|
|
|
chk, err = h.opts.ChunkPool.Get(s.headChunks.chunk.Encoding(), newB)
|
2023-02-19 17:34:51 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, 0, err
|
|
|
|
}
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2023-05-19 08:24:06 +00:00
|
|
|
return &safeHeadChunk{
|
2023-02-19 17:34:51 +00:00
|
|
|
Chunk: chk,
|
2023-02-21 09:30:11 +00:00
|
|
|
s: s,
|
|
|
|
cid: cid,
|
2024-08-23 15:57:43 +00:00
|
|
|
isoState: isoState,
|
2023-02-19 17:34:51 +00:00
|
|
|
}, maxTime, nil
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2021-11-17 13:05:10 +00:00
|
|
|
// chunk returns the chunk for the HeadChunkID from memory or by m-mapping it from the disk.
|
2023-03-21 09:45:36 +00:00
|
|
|
// If headChunk is false, it means that the returned *memChunk
|
2021-11-17 10:21:27 +00:00
|
|
|
// (and not the chunkenc.Chunk inside it) can be garbage collected after its usage.
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
// if isOpen is true, it means that the returned *memChunk is used for appends.
|
|
|
|
func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDiskMapper, memChunkPool *sync.Pool) (chunk *memChunk, headChunk, isOpen bool, err error) {
|
2021-08-03 12:14:26 +00:00
|
|
|
// ix represents the index of chunk in the s.mmappedChunks slice. The chunk id's are
|
|
|
|
// incremented by 1 when new chunk is created, hence (id - firstChunkID) gives the slice index.
|
|
|
|
// The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
// is >= len(s.mmappedChunks), it represents one of the chunks on s.headChunks linked list.
|
2024-08-28 03:26:57 +00:00
|
|
|
// The order of elements is different for slice and linked list.
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
// For s.mmappedChunks slice newer chunks are appended to it.
|
|
|
|
// For s.headChunks list newer chunks are prepended to it.
|
|
|
|
//
|
|
|
|
// memSeries {
|
|
|
|
// mmappedChunks: [t0, t1, t2]
|
|
|
|
// headChunk: {t5}->{t4}->{t3}
|
|
|
|
// }
|
2021-11-17 13:05:10 +00:00
|
|
|
ix := int(id) - int(s.firstChunkID)
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
|
|
|
|
var headChunksLen int
|
|
|
|
if s.headChunks != nil {
|
|
|
|
headChunksLen = s.headChunks.len()
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
2023-02-19 17:34:51 +00:00
|
|
|
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
if ix < 0 || ix > len(s.mmappedChunks)+headChunksLen-1 {
|
|
|
|
return nil, false, false, storage.ErrNotFound
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
|
|
|
|
if ix < len(s.mmappedChunks) {
|
|
|
|
chk, err := chunkDiskMapper.Chunk(s.mmappedChunks[ix].ref)
|
|
|
|
if err != nil {
|
2023-11-16 18:54:41 +00:00
|
|
|
var cerr *chunks.CorruptionErr
|
|
|
|
if errors.As(err, &cerr) {
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
return nil, false, false, err
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
mc := memChunkPool.Get().(*memChunk)
|
|
|
|
mc.chunk = chk
|
|
|
|
mc.minTime = s.mmappedChunks[ix].minTime
|
|
|
|
mc.maxTime = s.mmappedChunks[ix].maxTime
|
|
|
|
return mc, false, false, nil
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
|
|
|
|
ix -= len(s.mmappedChunks)
|
|
|
|
|
|
|
|
offset := headChunksLen - ix - 1
|
|
|
|
// headChunks is a linked list where first element is the most recent one and the last one is the oldest.
|
|
|
|
// This order is reversed when compared with mmappedChunks, since mmappedChunks[0] is the oldest chunk,
|
|
|
|
// while headChunk.atOffset(0) would give us the most recent chunk.
|
|
|
|
// So when calling headChunk.atOffset() we need to reverse the value of ix.
|
|
|
|
elem := s.headChunks.atOffset(offset)
|
|
|
|
if elem == nil {
|
|
|
|
// This should never really happen and would mean that headChunksLen value is NOT equal
|
|
|
|
// to the length of the headChunks list.
|
|
|
|
return nil, false, false, storage.ErrNotFound
|
|
|
|
}
|
|
|
|
return elem, true, offset == 0, nil
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2024-08-23 16:10:02 +00:00
|
|
|
// oooChunk returns the chunk for the HeadChunkID by m-mapping it from the disk.
|
|
|
|
// It never returns the head OOO chunk.
|
|
|
|
func (s *memSeries) oooChunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDiskMapper, memChunkPool *sync.Pool) (chunk chunkenc.Chunk, maxTime int64, err error) {
|
|
|
|
// ix represents the index of chunk in the s.ooo.oooMmappedChunks slice. The chunk id's are
|
|
|
|
// incremented by 1 when new chunk is created, hence (id - firstOOOChunkID) gives the slice index.
|
|
|
|
ix := int(id) - int(s.ooo.firstOOOChunkID)
|
2022-09-20 17:05:50 +00:00
|
|
|
|
2024-08-23 16:10:02 +00:00
|
|
|
if ix < 0 || ix >= len(s.ooo.oooMmappedChunks) {
|
|
|
|
return nil, 0, storage.ErrNotFound
|
2022-09-20 17:05:50 +00:00
|
|
|
}
|
|
|
|
|
2024-08-23 16:10:02 +00:00
|
|
|
chk, err := chunkDiskMapper.Chunk(s.ooo.oooMmappedChunks[ix].ref)
|
|
|
|
return chk, s.ooo.oooMmappedChunks[ix].maxTime, err
|
2022-09-20 17:05:50 +00:00
|
|
|
}
|
|
|
|
|
2023-10-03 20:09:25 +00:00
|
|
|
// safeHeadChunk makes sure that the chunk can be accessed without a race condition.
|
2023-05-19 08:24:06 +00:00
|
|
|
type safeHeadChunk struct {
|
2021-08-03 12:14:26 +00:00
|
|
|
chunkenc.Chunk
|
2023-02-21 09:30:11 +00:00
|
|
|
s *memSeries
|
|
|
|
cid chunks.HeadChunkID
|
|
|
|
isoState *isolationState
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2023-05-19 08:24:06 +00:00
|
|
|
func (c *safeHeadChunk) Iterator(reuseIter chunkenc.Iterator) chunkenc.Iterator {
|
2021-08-03 12:14:26 +00:00
|
|
|
c.s.Lock()
|
2023-02-21 09:30:11 +00:00
|
|
|
it := c.s.iterator(c.cid, c.Chunk, c.isoState, reuseIter)
|
2021-08-03 12:14:26 +00:00
|
|
|
c.s.Unlock()
|
|
|
|
return it
|
|
|
|
}
|
|
|
|
|
2021-11-17 13:05:10 +00:00
|
|
|
// iterator returns a chunk iterator for the requested chunkID, or a NopIterator if the requested ID is out of range.
|
2021-08-03 12:14:26 +00:00
|
|
|
// It is unsafe to call this concurrently with s.append(...) without holding the series lock.
|
2023-02-21 09:30:11 +00:00
|
|
|
func (s *memSeries) iterator(id chunks.HeadChunkID, c chunkenc.Chunk, isoState *isolationState, it chunkenc.Iterator) chunkenc.Iterator {
|
2021-11-17 13:05:10 +00:00
|
|
|
ix := int(id) - int(s.firstChunkID)
|
2021-08-03 12:14:26 +00:00
|
|
|
|
2023-02-21 09:30:11 +00:00
|
|
|
numSamples := c.NumSamples()
|
2021-08-03 12:14:26 +00:00
|
|
|
stopAfter := numSamples
|
|
|
|
|
2021-11-19 10:11:32 +00:00
|
|
|
if isoState != nil && !isoState.IsolationDisabled() {
|
2021-08-03 12:14:26 +00:00
|
|
|
totalSamples := 0 // Total samples in this series.
|
|
|
|
previousSamples := 0 // Samples before this chunk.
|
|
|
|
|
|
|
|
for j, d := range s.mmappedChunks {
|
|
|
|
totalSamples += int(d.numSamples)
|
|
|
|
if j < ix {
|
|
|
|
previousSamples += int(d.numSamples)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Use a linked list for memSeries.headChunk (#11818)
Currently memSeries holds a single head chunk in-memory and a slice of mmapped chunks.
When append() is called on memSeries it might decide that a new headChunk is needed to use for given append() call.
If that happens it will first mmap existing head chunk and only after that happens it will create a new empty headChunk and continue appending
our sample to it.
Since appending samples uses write lock on memSeries no other read or write can happen until any append is completed.
When we have an append() that must create a new head chunk the whole memSeries is blocked until mmapping of existing head chunk finishes.
Mmapping itself uses a lock as it needs to be serialised, which means that the more chunks to mmap we have the longer each chunk might wait
for it to be mmapped.
If there's enough chunks that require mmapping some memSeries will be locked for long enough that it will start affecting
queries and scrapes.
Queries might timeout, since by default they have a 2 minute timeout set.
Scrapes will be blocked inside append() call, which means there will be a gap between samples. This will first affect range queries
or calls using rate() and such, since the time range requested in the query might have too few samples to calculate anything.
To avoid this we need to remove mmapping from append path, since mmapping is blocking.
But this means that when we cut a new head chunk we need to keep the old one around, so we can mmap it later.
This change makes memSeries.headChunk a linked list, memSeries.headChunk still points to the 'open' head chunk that receives new samples,
while older, yet to be mmapped, chunks are linked to it.
Mmapping is done on a schedule by iterating all memSeries one by one. Thanks to this we control when mmapping is done, since we trigger
it manually, which reduces the risk that it will have to compete for mmap locks with other chunks.
Signed-off-by: Łukasz Mierzwa <l.mierzwa@gmail.com>
2023-07-31 09:10:24 +00:00
|
|
|
ix -= len(s.mmappedChunks)
|
|
|
|
if s.headChunks != nil {
|
|
|
|
// Iterate all head chunks from the oldest to the newest.
|
|
|
|
headChunksLen := s.headChunks.len()
|
|
|
|
for j := headChunksLen - 1; j >= 0; j-- {
|
|
|
|
chk := s.headChunks.atOffset(j)
|
|
|
|
chkSamples := chk.chunk.NumSamples()
|
|
|
|
totalSamples += chkSamples
|
|
|
|
// Chunk ID is len(s.mmappedChunks) + $(headChunks list position).
|
|
|
|
// Where $(headChunks list position) is zero for the oldest chunk and $(s.headChunks.len() - 1)
|
|
|
|
// for the newest (open) chunk.
|
|
|
|
if headChunksLen-1-j < ix {
|
|
|
|
previousSamples += chkSamples
|
|
|
|
}
|
|
|
|
}
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Removing the extra transactionIDs that are relevant for samples that
|
|
|
|
// come after this chunk, from the total transactionIDs.
|
2023-10-21 12:38:01 +00:00
|
|
|
appendIDsToConsider := int(s.txs.txIDCount) - (totalSamples - (previousSamples + numSamples))
|
2021-08-03 12:14:26 +00:00
|
|
|
|
|
|
|
// Iterate over the appendIDs, find the first one that the isolation state says not
|
|
|
|
// to return.
|
|
|
|
it := s.txs.iterator()
|
|
|
|
for index := 0; index < appendIDsToConsider; index++ {
|
|
|
|
appendID := it.At()
|
|
|
|
if appendID <= isoState.maxAppendID { // Easy check first.
|
|
|
|
if _, ok := isoState.incompleteAppends[appendID]; !ok {
|
|
|
|
it.Next()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
stopAfter = numSamples - (appendIDsToConsider - index)
|
|
|
|
if stopAfter < 0 {
|
|
|
|
stopAfter = 0 // Stopped in a previous chunk.
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if stopAfter == 0 {
|
|
|
|
return chunkenc.NewNopIterator()
|
|
|
|
}
|
2022-09-27 14:02:05 +00:00
|
|
|
if stopAfter == numSamples {
|
2023-02-21 09:30:11 +00:00
|
|
|
return c.Iterator(it)
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
2023-02-21 09:30:11 +00:00
|
|
|
return makeStopIterator(c, it, stopAfter)
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
|
2021-11-17 10:21:27 +00:00
|
|
|
// stopIterator wraps an Iterator, but only returns the first
|
|
|
|
// stopAfter values, if initialized with i=-1.
|
2021-08-03 12:14:26 +00:00
|
|
|
type stopIterator struct {
|
|
|
|
chunkenc.Iterator
|
|
|
|
|
|
|
|
i, stopAfter int
|
|
|
|
}
|
|
|
|
|
2021-11-29 07:54:23 +00:00
|
|
|
func (it *stopIterator) Next() chunkenc.ValueType {
|
2021-08-03 12:14:26 +00:00
|
|
|
if it.i+1 >= it.stopAfter {
|
2021-11-29 07:54:23 +00:00
|
|
|
return chunkenc.ValNone
|
2021-08-03 12:14:26 +00:00
|
|
|
}
|
|
|
|
it.i++
|
|
|
|
return it.Iterator.Next()
|
|
|
|
}
|
2022-10-05 20:14:49 +00:00
|
|
|
|
|
|
|
func makeStopIterator(c chunkenc.Chunk, it chunkenc.Iterator, stopAfter int) chunkenc.Iterator {
|
|
|
|
// Re-use the Iterator object if it is a stopIterator.
|
|
|
|
if stopIter, ok := it.(*stopIterator); ok {
|
|
|
|
stopIter.Iterator = c.Iterator(stopIter.Iterator)
|
|
|
|
stopIter.i = -1
|
|
|
|
stopIter.stopAfter = stopAfter
|
|
|
|
return stopIter
|
|
|
|
}
|
|
|
|
|
|
|
|
return &stopIterator{
|
|
|
|
Iterator: c.Iterator(it),
|
|
|
|
i: -1,
|
|
|
|
stopAfter: stopAfter,
|
|
|
|
}
|
|
|
|
}
|