fix!: stop unbounded memory usage from query log

Resolves: #15433

When I converted prometheus to use slog in #14906, I update both the
`QueryLogger` interface, as well as how the log calls to the
`QueryLogger` were built up in `promql.Engine.exec()`. The backing
logger for the `QueryLogger` in the engine is a
`util/logging.JSONFileLogger`, and it's implementation of the `With()`
method updates the logger the logger in place with the new keyvals added
onto the underlying slog.Logger, which means they get inherited onto
everything after. All subsequent calls to `With()`, even in later
queries, would continue to then append on more and more keyvals for the
various params and fields built up in the logger. In turn, this causes
unbounded growth of the logger, leading to increased memory usage, and
in at least one report was the likely cause of an OOM kill. More
information can be found in the issue and the linked slack thread.

This commit does a few things:

- It was referenced in feedback in #14906 that it would've been better
  to not change the `QueryLogger` interface if possible, this PR
proposes changes that bring it closer to alignment with the pre-3.0
`QueryLogger` interface contract
- reverts `promql.Engine.exec()`'s usage of the query logger to the
  pattern of building up an array of args to pass at once to the end log
call. Avoiding the repetitious calls to `.With()` are what resolve the
issue with the logger growth/memory usage.
- updates the scrape failure logger to use the update `QueryLogger`
  methods in the contract.
- updates tests accordingly
- cleans up unused methods

Builds and passes tests successfully. Tested locally and confirmed I
could no longer reproduce the issue/it resolved the issue.

Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
pull/15434/head
TJ Hoplock 6 days ago
parent bafb4d68de
commit 3e24e84172

@ -126,10 +126,7 @@ type QueryEngine interface {
// QueryLogger is an interface that can be used to log all the queries logged // QueryLogger is an interface that can be used to log all the queries logged
// by the engine. // by the engine.
type QueryLogger interface { type QueryLogger interface {
Error(msg string, args ...any) Log(context.Context, slog.Level, string, ...any)
Info(msg string, args ...any)
Debug(msg string, args ...any)
Warn(msg string, args ...any)
With(args ...any) With(args ...any)
Close() error Close() error
} }
@ -637,20 +634,20 @@ func (ng *Engine) exec(ctx context.Context, q *query) (v parser.Value, ws annota
// The step provided by the user is in seconds. // The step provided by the user is in seconds.
params["step"] = int64(eq.Interval / (time.Second / time.Nanosecond)) params["step"] = int64(eq.Interval / (time.Second / time.Nanosecond))
} }
l.With("params", params) f := []interface{}{"params", params}
if err != nil { if err != nil {
l.With("error", err) f = append(f, "error", err)
} }
l.With("stats", stats.NewQueryStats(q.Stats())) f = append(f, "stats", stats.NewQueryStats(q.Stats()))
if span := trace.SpanFromContext(ctx); span != nil { if span := trace.SpanFromContext(ctx); span != nil {
l.With("spanID", span.SpanContext().SpanID()) f = append(f, "spanID", span.SpanContext().SpanID())
} }
if origin := ctx.Value(QueryOrigin{}); origin != nil { if origin := ctx.Value(QueryOrigin{}); origin != nil {
for k, v := range origin.(map[string]interface{}) { for k, v := range origin.(map[string]interface{}) {
l.With(k, v) f = append(f, k, v)
} }
} }
l.Info("promql query logged") l.Log(context.Background(), slog.LevelInfo, "promql query logged", f...)
// TODO: @tjhop -- do we still need this metric/error log if logger doesn't return errors? // TODO: @tjhop -- do we still need this metric/error log if logger doesn't return errors?
// ng.metrics.queryLogFailures.Inc() // ng.metrics.queryLogFailures.Inc()
// ng.logger.Error("can't log query", "err", err) // ng.logger.Error("can't log query", "err", err)

@ -17,6 +17,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"log/slog"
"math" "math"
"sort" "sort"
"strconv" "strconv"
@ -2169,31 +2170,10 @@ func (f *FakeQueryLogger) Close() error {
} }
// It implements the promql.QueryLogger interface. // It implements the promql.QueryLogger interface.
func (f *FakeQueryLogger) Info(msg string, args ...any) { func (f *FakeQueryLogger) Log(ctx context.Context, level slog.Level, msg string, args ...any) {
log := append([]any{msg}, args...) // Test usage only really cares about existence of keyvals passed in
log = append(log, f.attrs...) // via args, just append in the log message before handling the
f.attrs = f.attrs[:0] // provided args and any embedded kvs added via `.With()` on f.attrs.
f.logs = append(f.logs, log...)
}
// It implements the promql.QueryLogger interface.
func (f *FakeQueryLogger) Error(msg string, args ...any) {
log := append([]any{msg}, args...)
log = append(log, f.attrs...)
f.attrs = f.attrs[:0]
f.logs = append(f.logs, log...)
}
// It implements the promql.QueryLogger interface.
func (f *FakeQueryLogger) Warn(msg string, args ...any) {
log := append([]any{msg}, args...)
log = append(log, f.attrs...)
f.attrs = f.attrs[:0]
f.logs = append(f.logs, log...)
}
// It implements the promql.QueryLogger interface.
func (f *FakeQueryLogger) Debug(msg string, args ...any) {
log := append([]any{msg}, args...) log := append([]any{msg}, args...)
log = append(log, f.attrs...) log = append(log, f.attrs...)
f.attrs = f.attrs[:0] f.attrs = f.attrs[:0]

@ -1421,7 +1421,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er
sl.l.Debug("Scrape failed", "err", scrapeErr) sl.l.Debug("Scrape failed", "err", scrapeErr)
sl.scrapeFailureLoggerMtx.RLock() sl.scrapeFailureLoggerMtx.RLock()
if sl.scrapeFailureLogger != nil { if sl.scrapeFailureLogger != nil {
sl.scrapeFailureLogger.Error("err", scrapeErr) sl.scrapeFailureLogger.Log(context.Background(), slog.LevelError, scrapeErr.Error())
} }
sl.scrapeFailureLoggerMtx.RUnlock() sl.scrapeFailureLoggerMtx.RUnlock()
if errc != nil { if errc != nil {

@ -14,6 +14,7 @@
package logging package logging
import ( import (
"context"
"fmt" "fmt"
"log/slog" "log/slog"
"os" "os"
@ -57,26 +58,8 @@ func (l *JSONFileLogger) With(args ...any) {
l.logger = l.logger.With(args...) l.logger = l.logger.With(args...)
} }
// Info calls the `Info()` method on the underlying `log/slog.Logger` with the // Log calls the `Log()` method on the underlying `log/slog.Logger` with the
// provided msg and args. It implements the promql.QueryLogger interface. // provided msg and args. It implements the promql.QueryLogger interface.
func (l *JSONFileLogger) Info(msg string, args ...any) { func (l *JSONFileLogger) Log(ctx context.Context, level slog.Level, msg string, args ...any) {
l.logger.Info(msg, args...) l.logger.Log(ctx, level, msg, args...)
}
// Error calls the `Error()` method on the underlying `log/slog.Logger` with the
// provided msg and args. It implements the promql.QueryLogger interface.
func (l *JSONFileLogger) Error(msg string, args ...any) {
l.logger.Error(msg, args...)
}
// Debug calls the `Debug()` method on the underlying `log/slog.Logger` with the
// provided msg and args. It implements the promql.QueryLogger interface.
func (l *JSONFileLogger) Debug(msg string, args ...any) {
l.logger.Debug(msg, args...)
}
// Warn calls the `Warn()` method on the underlying `log/slog.Logger` with the
// provided msg and args. It implements the promql.QueryLogger interface.
func (l *JSONFileLogger) Warn(msg string, args ...any) {
l.logger.Warn(msg, args...)
} }

@ -14,6 +14,8 @@
package logging package logging
import ( import (
"context"
"log/slog"
"os" "os"
"strings" "strings"
"testing" "testing"
@ -34,7 +36,7 @@ func TestJSONFileLogger_basic(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, l, "logger can't be nil") require.NotNil(t, l, "logger can't be nil")
l.Info("test", "hello", "world") l.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
require.NoError(t, err) require.NoError(t, err)
r := make([]byte, 1024) r := make([]byte, 1024)
_, err = f.Read(r) _, err = f.Read(r)
@ -64,14 +66,14 @@ func TestJSONFileLogger_parallel(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, l, "logger can't be nil") require.NotNil(t, l, "logger can't be nil")
l.Info("test", "hello", "world") l.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
require.NoError(t, err) require.NoError(t, err)
l2, err := NewJSONFileLogger(f.Name()) l2, err := NewJSONFileLogger(f.Name())
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, l, "logger can't be nil") require.NotNil(t, l, "logger can't be nil")
l2.Info("test", "hello", "world") l2.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
require.NoError(t, err) require.NoError(t, err)
err = l.Close() err = l.Close()

Loading…
Cancel
Save