Browse Source

Merge branch 'release-2.53' into merge-2.53.1

pull/14455/head
Bryan Boreham 4 months ago
parent
commit
646e1e847d
  1. 8
      CHANGELOG.md
  2. 2
      VERSION
  3. 21
      tsdb/wlog/watcher.go
  4. 110
      tsdb/wlog/watcher_test.go
  5. 4
      web/ui/module/codemirror-promql/package.json
  6. 2
      web/ui/module/lezer-promql/package.json
  7. 14
      web/ui/package-lock.json
  8. 2
      web/ui/package.json
  9. 4
      web/ui/react-app/package.json

8
CHANGELOG.md

@ -2,6 +2,14 @@
## unreleased
## 2.53.1 / 2024-07-10
Fix a bug which would drop samples in remote-write if the sending flow stalled
for longer than it takes to write one "WAL segment". How long this takes depends on the size
of your Prometheus; as a rough guide with 10 million series it is about 2-3 minutes.
* [BUGFIX] Remote-write: stop dropping samples in catch-up #14446
## 2.53.0 / 2024-06-16
This release changes the default for GOGC, the Go runtime control for the trade-off between excess memory use and CPU usage. We have found that Prometheus operates with minimal additional CPU usage, but greatly reduced memory by adjusting the upstream Go default from 100 to 75.

2
VERSION

@ -1 +1 @@
2.53.0
2.53.1

21
tsdb/wlog/watcher.go

@ -265,6 +265,11 @@ func (w *Watcher) loop() {
// Run the watcher, which will tail the WAL until the quit channel is closed
// or an error case is hit.
func (w *Watcher) Run() error {
_, lastSegment, err := w.firstAndLast()
if err != nil {
return fmt.Errorf("wal.Segments: %w", err)
}
// We want to ensure this is false across iterations since
// Run will be called again if there was a failure to read the WAL.
w.sendSamples = false
@ -289,20 +294,14 @@ func (w *Watcher) Run() error {
return err
}
level.Debug(w.logger).Log("msg", "Tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment)
level.Debug(w.logger).Log("msg", "Tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment)
for !isClosed(w.quit) {
w.currentSegmentMetric.Set(float64(currentSegment))
// Re-check on each iteration in case a new segment was added,
// because watch() will wait for notifications on the last segment.
_, lastSegment, err := w.firstAndLast()
if err != nil {
return fmt.Errorf("wal.Segments: %w", err)
}
tail := currentSegment >= lastSegment
level.Debug(w.logger).Log("msg", "Processing segment", "currentSegment", currentSegment, "lastSegment", lastSegment)
if err := w.watch(currentSegment, tail); err != nil && !errors.Is(err, ErrIgnorable) {
// On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment.
// On subsequent calls to this function, currentSegment will have been incremented and we should open that segment.
level.Debug(w.logger).Log("msg", "Processing segment", "currentSegment", currentSegment)
if err := w.watch(currentSegment, currentSegment >= lastSegment); err != nil && !errors.Is(err, ErrIgnorable) {
return err
}

110
tsdb/wlog/watcher_test.go

@ -17,6 +17,7 @@ import (
"math/rand"
"os"
"path"
"runtime"
"sync"
"testing"
"time"
@ -700,11 +701,46 @@ func TestRun_StartupTime(t *testing.T) {
}
}
func generateWALRecords(w *WL, segment, seriesCount, samplesCount int) error {
enc := record.Encoder{}
for j := 0; j < seriesCount; j++ {
ref := j + (segment * 100)
series := enc.Series([]record.RefSeries{
{
Ref: chunks.HeadSeriesRef(ref),
Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", segment)),
},
}, nil)
if err := w.Log(series); err != nil {
return err
}
for k := 0; k < samplesCount; k++ {
inner := rand.Intn(ref + 1)
sample := enc.Samples([]record.RefSample{
{
Ref: chunks.HeadSeriesRef(inner),
T: int64(segment),
V: float64(segment),
},
}, nil)
if err := w.Log(sample); err != nil {
return err
}
}
}
return nil
}
func TestRun_AvoidNotifyWhenBehind(t *testing.T) {
const pageSize = 32 * 1024
const segments = 10
const seriesCount = 20
const samplesCount = 300
if runtime.GOOS == "windows" { // Takes a really long time, perhaps because min sleep time is 15ms.
t.SkipNow()
}
const segmentSize = pageSize // Smallest allowed segment size.
const segmentsToWrite = 5
const segmentsToRead = segmentsToWrite - 1
const seriesCount = 10
const samplesCount = 50
// This test can take longer than intended to finish in cloud CI.
readTimeout := 10 * time.Second
@ -717,73 +753,37 @@ func TestRun_AvoidNotifyWhenBehind(t *testing.T) {
err := os.Mkdir(wdir, 0o777)
require.NoError(t, err)
enc := record.Encoder{}
w, err := NewSize(nil, nil, wdir, pageSize, compress)
w, err := NewSize(nil, nil, wdir, segmentSize, compress)
require.NoError(t, err)
var wg sync.WaitGroup
// add one segment initially to ensure there's a value > 0 for the last segment id
for i := 0; i < 1; i++ {
for j := 0; j < seriesCount; j++ {
ref := j + (i * 100)
series := enc.Series([]record.RefSeries{
{
Ref: chunks.HeadSeriesRef(ref),
Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)),
},
}, nil)
require.NoError(t, w.Log(series))
for k := 0; k < samplesCount; k++ {
inner := rand.Intn(ref + 1)
sample := enc.Samples([]record.RefSample{
{
Ref: chunks.HeadSeriesRef(inner),
T: int64(i),
V: float64(i),
},
}, nil)
require.NoError(t, w.Log(sample))
}
}
}
// Generate one segment initially to ensure that watcher.Run() finds at least one segment on disk.
require.NoError(t, generateWALRecords(w, 0, seriesCount, samplesCount))
w.NextSegment() // Force creation of the next segment
wg.Add(1)
go func() {
defer wg.Done()
for i := 1; i < segments; i++ {
for j := 0; j < seriesCount; j++ {
ref := j + (i * 100)
series := enc.Series([]record.RefSeries{
{
Ref: chunks.HeadSeriesRef(ref),
Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)),
},
}, nil)
require.NoError(t, w.Log(series))
for k := 0; k < samplesCount; k++ {
inner := rand.Intn(ref + 1)
sample := enc.Samples([]record.RefSample{
{
Ref: chunks.HeadSeriesRef(inner),
T: int64(i),
V: float64(i),
},
}, nil)
require.NoError(t, w.Log(sample))
}
}
for i := 1; i < segmentsToWrite; i++ {
require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount))
w.NextSegment()
}
}()
wt := newWriteToMock(time.Millisecond)
watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false)
watcher.MaxSegment = segments
watcher.MaxSegment = segmentsToRead
watcher.setMetrics()
startTime := time.Now()
err = watcher.Run()
wg.Wait()
require.Less(t, time.Since(startTime), readTimeout)
// But samples records shouldn't get dropped
retry(t, defaultRetryInterval, defaultRetries, func() bool {
return wt.checkNumSeries() > 0
})
require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended)
require.NoError(t, err)
require.NoError(t, w.Close())
})

4
web/ui/module/codemirror-promql/package.json

@ -1,6 +1,6 @@
{
"name": "@prometheus-io/codemirror-promql",
"version": "0.53.0",
"version": "0.53.1",
"description": "a CodeMirror mode for the PromQL language",
"types": "dist/esm/index.d.ts",
"module": "dist/esm/index.js",
@ -29,7 +29,7 @@
},
"homepage": "https://github.com/prometheus/prometheus/blob/main/web/ui/module/codemirror-promql/README.md",
"dependencies": {
"@prometheus-io/lezer-promql": "0.53.0",
"@prometheus-io/lezer-promql": "0.53.1",
"lru-cache": "^7.18.3"
},
"devDependencies": {

2
web/ui/module/lezer-promql/package.json

@ -1,6 +1,6 @@
{
"name": "@prometheus-io/lezer-promql",
"version": "0.53.0",
"version": "0.53.1",
"description": "lezer-based PromQL grammar",
"main": "dist/index.cjs",
"type": "module",

14
web/ui/package-lock.json generated

@ -1,12 +1,12 @@
{
"name": "prometheus-io",
"version": "0.53.0",
"version": "0.53.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "prometheus-io",
"version": "0.53.0",
"version": "0.53.1",
"workspaces": [
"react-app",
"module/*"
@ -30,10 +30,10 @@
},
"module/codemirror-promql": {
"name": "@prometheus-io/codemirror-promql",
"version": "0.53.0",
"version": "0.53.1",
"license": "Apache-2.0",
"dependencies": {
"@prometheus-io/lezer-promql": "0.53.0",
"@prometheus-io/lezer-promql": "0.53.1",
"lru-cache": "^7.18.3"
},
"devDependencies": {
@ -69,7 +69,7 @@
},
"module/lezer-promql": {
"name": "@prometheus-io/lezer-promql",
"version": "0.53.0",
"version": "0.53.1",
"license": "Apache-2.0",
"devDependencies": {
"@lezer/generator": "^1.7.0",
@ -19331,7 +19331,7 @@
},
"react-app": {
"name": "@prometheus-io/app",
"version": "0.53.0",
"version": "0.53.1",
"dependencies": {
"@codemirror/autocomplete": "^6.16.2",
"@codemirror/commands": "^6.6.0",
@ -19349,7 +19349,7 @@
"@lezer/lr": "^1.4.1",
"@nexucis/fuzzy": "^0.4.1",
"@nexucis/kvsearch": "^0.8.1",
"@prometheus-io/codemirror-promql": "0.53.0",
"@prometheus-io/codemirror-promql": "0.53.1",
"bootstrap": "^4.6.2",
"css.escape": "^1.5.1",
"downshift": "^9.0.6",

2
web/ui/package.json

@ -28,5 +28,5 @@
"ts-jest": "^29.1.4",
"typescript": "^4.9.5"
},
"version": "0.53.0"
"version": "0.53.1"
}

4
web/ui/react-app/package.json

@ -1,6 +1,6 @@
{
"name": "@prometheus-io/app",
"version": "0.53.0",
"version": "0.53.1",
"private": true,
"dependencies": {
"@codemirror/autocomplete": "^6.16.2",
@ -19,7 +19,7 @@
"@lezer/lr": "^1.4.1",
"@nexucis/fuzzy": "^0.4.1",
"@nexucis/kvsearch": "^0.8.1",
"@prometheus-io/codemirror-promql": "0.53.0",
"@prometheus-io/codemirror-promql": "0.53.1",
"bootstrap": "^4.6.2",
"css.escape": "^1.5.1",
"downshift": "^9.0.6",

Loading…
Cancel
Save