From 82b5f1d8b12cd4a676f1c77051691d29bd211cad Mon Sep 17 00:00:00 2001 From: Julien Pivotto Date: Tue, 8 Dec 2020 21:55:45 +0100 Subject: [PATCH 1/2] Backfill: Use mmap to reuse parser code Signed-off-by: Julien Pivotto --- cmd/promtool/backfill.go | 44 +++---------------------- cmd/promtool/backfill_test.go | 62 ++++++++++++++++++++++++----------- cmd/promtool/tsdb.go | 7 ++-- 3 files changed, 51 insertions(+), 62 deletions(-) diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index b98d430e2..6848bb37a 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -14,51 +14,18 @@ package main import ( - "bufio" "context" "io" "math" - "os" "github.com/go-kit/kit/log" "github.com/pkg/errors" - "github.com/prometheus/common/expfmt" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/textparse" "github.com/prometheus/prometheus/tsdb" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" ) -// OpenMetricsParser implements textparse.Parser. -type OpenMetricsParser struct { - textparse.Parser - s *bufio.Scanner -} - -// NewOpenMetricsParser returns an OpenMetricsParser reading from the provided reader. -func NewOpenMetricsParser(r io.Reader) *OpenMetricsParser { - return &OpenMetricsParser{s: bufio.NewScanner(r)} -} - -// Next advances the parser to the next sample. It returns io.EOF if no -// more samples were read. -func (p *OpenMetricsParser) Next() (textparse.Entry, error) { - for p.s.Scan() { - line := p.s.Bytes() - line = append(line, '\n') - - p.Parser = textparse.New(line, string(expfmt.FmtOpenMetrics)) - if et, err := p.Parser.Next(); err != io.EOF { - return et, err - } - } - - if err := p.s.Err(); err != nil { - return 0, err - } - return 0, io.EOF -} - func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) { var maxt, mint int64 = math.MinInt64, math.MaxInt64 @@ -98,7 +65,7 @@ func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) { return maxt, mint, nil } -func createBlocks(input *os.File, mint, maxt int64, maxSamplesInAppender int, outputDir string) (returnErr error) { +func createBlocks(input []byte, mint, maxt int64, maxSamplesInAppender int, outputDir string) (returnErr error) { blockDuration := tsdb.DefaultBlockDuration mint = blockDuration * (mint / blockDuration) @@ -120,12 +87,9 @@ func createBlocks(input *os.File, mint, maxt int64, maxSamplesInAppender int, ou err = tsdb_errors.NewMulti(err, w.Close()).Err() }() - if _, err := input.Seek(0, 0); err != nil { - return errors.Wrap(err, "seek file") - } ctx := context.Background() app := w.Appender(ctx) - p := NewOpenMetricsParser(input) + p := textparse.NewOpenMetricsParser(input) tsUpper := t + blockDuration samplesCount := 0 for { @@ -194,8 +158,8 @@ func createBlocks(input *os.File, mint, maxt int64, maxSamplesInAppender int, ou return nil } -func backfill(maxSamplesInAppender int, input *os.File, outputDir string) (err error) { - p := NewOpenMetricsParser(input) +func backfill(maxSamplesInAppender int, input []byte, outputDir string) (err error) { + p := textparse.NewOpenMetricsParser(input) maxt, mint, err := getMinAndMaxTimestamps(p) if err != nil { return errors.Wrap(err, "getting min and max timestamp") diff --git a/cmd/promtool/backfill_test.go b/cmd/promtool/backfill_test.go index 612ee2fb8..4fe983ac1 100644 --- a/cmd/promtool/backfill_test.go +++ b/cmd/promtool/backfill_test.go @@ -33,17 +33,6 @@ type backfillSample struct { Labels labels.Labels } -func createTemporaryOpenMetricsFile(t *testing.T, text string) string { - newf, err := ioutil.TempFile("", "") - require.NoError(t, err) - - _, err = newf.WriteString(text) - require.NoError(t, err) - require.NoError(t, newf.Close()) - - return newf.Name() -} - func sortSamples(samples []backfillSample) { sort.Slice(samples, func(x, y int) bool { sx, sy := samples[x], samples[y] @@ -329,6 +318,30 @@ http_requests_total{code="400"} 1 1565166113.989 }, }, }, + { + ToParse: `no_newline_after_eof 42 6900 +# EOF`, + IsOk: true, + Description: "Sample without newline after # EOF.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 6900000, + MaxTime: 6900000, + NumBlocks: 1, + Samples: []backfillSample{ + { + Timestamp: 6900000, + Value: 42, + Labels: labels.FromStrings("__name__", "no_newline_after_eof"), + }, + }, + }, + }, { ToParse: `bare_metric 42.24 1001 # EOF @@ -382,24 +395,35 @@ no_nl{type="no newline"} IsOk: false, Description: "No newline.", }, + { + ToParse: `# HELP no_eof This test has no EOF so will fail +# TYPE no_eof gauge +no_eof 1 1 +`, + IsOk: false, + Description: "No EOF.", + }, + { + ToParse: `# HELP after_eof There is data after EOF. +# TYPE after_eof gauge +after_eof 1 1 +# EOF +after_eof 1 2 +`, + IsOk: false, + Description: "Data after EOF.", + }, } for _, test := range tests { t.Logf("Test:%s", test.Description) - openMetricsFile := createTemporaryOpenMetricsFile(t, test.ToParse) - input, err := os.Open(openMetricsFile) - require.NoError(t, err) - defer func() { - require.NoError(t, input.Close()) - }() - outputDir, err := ioutil.TempDir("", "myDir") require.NoError(t, err) defer func() { require.NoError(t, os.RemoveAll(outputDir)) }() - err = backfill(test.MaxSamplesInAppender, input, outputDir) + err = backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir) if !test.IsOk { require.Error(t, err, test.Description) diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index c2ccf82d0..c44cf28be 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -39,6 +39,7 @@ import ( "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/tsdb/chunks" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" + "github.com/prometheus/prometheus/tsdb/fileutil" ) const timeDelta = 30000 @@ -615,12 +616,12 @@ func checkErr(err error) int { } func backfillOpenMetrics(path string, outputDir string) (err error) { - input, err := os.Open(path) + inputFile, err := fileutil.OpenMmapFile(path) if err != nil { return err } defer func() { - input.Close() + inputFile.Close() }() - return backfill(5000, input, outputDir) + return backfill(5000, inputFile.Bytes(), outputDir) } From 7957731339b5abb9c398626f3c54187c9ee7310f Mon Sep 17 00:00:00 2001 From: Julien Pivotto Date: Wed, 9 Dec 2020 09:23:39 +0100 Subject: [PATCH 2/2] Inline defer Signed-off-by: Julien Pivotto --- cmd/promtool/tsdb.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index c44cf28be..b2ca5da92 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -620,8 +620,6 @@ func backfillOpenMetrics(path string, outputDir string) (err error) { if err != nil { return err } - defer func() { - inputFile.Close() - }() + defer inputFile.Close() return backfill(5000, inputFile.Bytes(), outputDir) }