From c271ef456a0b696a389f7e607472c8c3573eafd1 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Thu, 4 Oct 2018 17:57:47 +0100 Subject: [PATCH] Add an OpenMetrics parser. This is based on the existing parser, and has slightly better performance (few %) than it. Signed-off-by: Brian Brazil --- Makefile | 1 + pkg/textparse/interface.go | 21 +- pkg/textparse/omlex.l | 71 ++++ pkg/textparse/omlex.l.go | 586 ++++++++++++++++++++++++++ pkg/textparse/omparse.go | 346 +++++++++++++++ pkg/textparse/omparse_test.go | 440 +++++++++++++++++++ pkg/textparse/promparse.go | 16 +- pkg/textparse/promparse_test.go | 236 ++++++----- pkg/textparse/promtestdata.nometa.txt | 1 + pkg/textparse/promtestdata.txt | 3 +- 10 files changed, 1598 insertions(+), 123 deletions(-) create mode 100644 pkg/textparse/omlex.l create mode 100644 pkg/textparse/omlex.l.go create mode 100644 pkg/textparse/omparse.go create mode 100644 pkg/textparse/omparse_test.go diff --git a/Makefile b/Makefile index 64423ccf0..268c639db 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ STATICCHECK_IGNORE = \ github.com/prometheus/prometheus/discovery/kubernetes/node.go:SA1019 \ github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/main.go:SA1019 \ github.com/prometheus/prometheus/pkg/textparse/promlex.l.go:SA4006 \ + github.com/prometheus/prometheus/pkg/textparse/omlex.l.go:SA4006 \ github.com/prometheus/prometheus/pkg/pool/pool.go:SA6002 \ github.com/prometheus/prometheus/promql/engine.go:SA6002 \ github.com/prometheus/prometheus/prompb/rpc.pb.gw.go:SA1019 diff --git a/pkg/textparse/interface.go b/pkg/textparse/interface.go index 75549a287..ed7775226 100644 --- a/pkg/textparse/interface.go +++ b/pkg/textparse/interface.go @@ -18,7 +18,7 @@ import ( ) // Parser parses samples from a byte slice of samples in the official -// Prometheus text exposition format. +// Prometheus and OpenMetrics text exposition formats. type Parser interface { // Series returns the bytes of the series, the timestamp if set, and the value // of the current sample. @@ -34,6 +34,11 @@ type Parser interface { // The returned byte slices become invalid after the next call to Next. Type() ([]byte, MetricType) + // Unit returns the metric name and unit in the current entry. + // Must only be called after Next returned a unit entry. + // The returned byte slices become invalid after the next call to Next. + Unit() ([]byte, []byte) + // Comment returns the text of the current comment. // Must only be called after Next returned a comment entry. // The returned byte slice becomes invalid after the next call to Next. @@ -62,15 +67,19 @@ const ( EntryHelp Entry = 1 EntrySeries Entry = 2 EntryComment Entry = 3 + EntryUnit Entry = 4 ) // MetricType represents metric type values. type MetricType string const ( - MetricTypeCounter = "counter" - MetricTypeGauge = "gauge" - MetricTypeHistogram = "histogram" - MetricTypeSummary = "summary" - MetricTypeUntyped = "untyped" + MetricTypeCounter = "counter" + MetricTypeGauge = "gauge" + MetricTypeHistogram = "histogram" + MetricTypeGaugeHistogram = "gaugehistogram" + MetricTypeSummary = "summary" + MetricTypeInfo = "info" + MetricTypeStateset = "stateset" + MetricTypeUnknown = "unknown" ) diff --git a/pkg/textparse/omlex.l b/pkg/textparse/omlex.l new file mode 100644 index 000000000..15d158140 --- /dev/null +++ b/pkg/textparse/omlex.l @@ -0,0 +1,71 @@ +%{ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "fmt" +) + +// Lex is called by the parser generated by "go tool yacc" to obtain each +// token. The method is opened before the matching rules block and closed at +// the end of the file. +func (l *omlexer) Lex() token { + if l.i >= len(l.b) { + return tEOF + } + c := l.b[l.i] + l.start = l.i + +%} + +D [0-9] +L [a-zA-Z_] +M [a-zA-Z_:] +C [^\n] +S [ ] + +%x sComment sMeta1 sMeta2 sLabels sLValue sValue sTimestamp + +%yyc c +%yyn c = l.next() +%yyt l.state + + +%% + +#{S} l.state = sComment +HELP{S} l.state = sMeta1; return tHelp +TYPE{S} l.state = sMeta1; return tType +UNIT{S} l.state = sMeta1; return tUnit +"EOF"\n? l.state = sInit; return tEofWord +{M}({M}|{D})* l.state = sMeta2; return tMName +{S}{C}*\n l.state = sInit; return tText + +{M}({M}|{D})* l.state = sValue; return tMName +\{ l.state = sLabels; return tBraceOpen +{L}({L}|{D})* return tLName +\} l.state = sValue; return tBraceClose += l.state = sLValue; return tEqual +, return tComma +\"(\\.|[^\\"\n])*\" l.state = sLabels; return tLValue +{S}[^ \n]+ l.state = sTimestamp; return tValue +{S}[^ \n]+ return tTimestamp +{S}#{S}{C}*\n l.state = sInit; return tLinebreak +\n l.state = sInit; return tLinebreak + +%% + + return tInvalid +} diff --git a/pkg/textparse/omlex.l.go b/pkg/textparse/omlex.l.go new file mode 100644 index 000000000..72372ee9f --- /dev/null +++ b/pkg/textparse/omlex.l.go @@ -0,0 +1,586 @@ +// Code generated by golex. DO NOT EDIT. + +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "fmt" +) + +// Lex is called by the parser generated by "go tool yacc" to obtain each +// token. The method is opened before the matching rules block and closed at +// the end of the file. +func (l *omlexer) Lex() token { + if l.i >= len(l.b) { + return tEOF + } + c := l.b[l.i] + l.start = l.i + +yystate0: + + switch yyt := l.state; yyt { + default: + panic(fmt.Errorf(`invalid start condition %d`, yyt)) + case 0: // start condition: INITIAL + goto yystart1 + case 1: // start condition: sComment + goto yystart5 + case 2: // start condition: sMeta1 + goto yystart25 + case 3: // start condition: sMeta2 + goto yystart27 + case 4: // start condition: sLabels + goto yystart30 + case 5: // start condition: sLValue + goto yystart35 + case 6: // start condition: sValue + goto yystart39 + case 7: // start condition: sTimestamp + goto yystart43 + } + + goto yystate0 // silence unused label error + goto yystate1 // silence unused label error +yystate1: + c = l.next() +yystart1: + switch { + default: + goto yyabort + case c == '#': + goto yystate2 + case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate4 + } + +yystate2: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate3 + } + +yystate3: + c = l.next() + goto yyrule1 + +yystate4: + c = l.next() + switch { + default: + goto yyrule8 + case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate4 + } + + goto yystate5 // silence unused label error +yystate5: + c = l.next() +yystart5: + switch { + default: + goto yyabort + case c == 'E': + goto yystate6 + case c == 'H': + goto yystate10 + case c == 'T': + goto yystate15 + case c == 'U': + goto yystate20 + } + +yystate6: + c = l.next() + switch { + default: + goto yyabort + case c == 'O': + goto yystate7 + } + +yystate7: + c = l.next() + switch { + default: + goto yyabort + case c == 'F': + goto yystate8 + } + +yystate8: + c = l.next() + switch { + default: + goto yyrule5 + case c == '\n': + goto yystate9 + } + +yystate9: + c = l.next() + goto yyrule5 + +yystate10: + c = l.next() + switch { + default: + goto yyabort + case c == 'E': + goto yystate11 + } + +yystate11: + c = l.next() + switch { + default: + goto yyabort + case c == 'L': + goto yystate12 + } + +yystate12: + c = l.next() + switch { + default: + goto yyabort + case c == 'P': + goto yystate13 + } + +yystate13: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate14 + } + +yystate14: + c = l.next() + goto yyrule2 + +yystate15: + c = l.next() + switch { + default: + goto yyabort + case c == 'Y': + goto yystate16 + } + +yystate16: + c = l.next() + switch { + default: + goto yyabort + case c == 'P': + goto yystate17 + } + +yystate17: + c = l.next() + switch { + default: + goto yyabort + case c == 'E': + goto yystate18 + } + +yystate18: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate19 + } + +yystate19: + c = l.next() + goto yyrule3 + +yystate20: + c = l.next() + switch { + default: + goto yyabort + case c == 'N': + goto yystate21 + } + +yystate21: + c = l.next() + switch { + default: + goto yyabort + case c == 'I': + goto yystate22 + } + +yystate22: + c = l.next() + switch { + default: + goto yyabort + case c == 'T': + goto yystate23 + } + +yystate23: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate24 + } + +yystate24: + c = l.next() + goto yyrule4 + + goto yystate25 // silence unused label error +yystate25: + c = l.next() +yystart25: + switch { + default: + goto yyabort + case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate26 + } + +yystate26: + c = l.next() + switch { + default: + goto yyrule6 + case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate26 + } + + goto yystate27 // silence unused label error +yystate27: + c = l.next() +yystart27: + switch { + default: + goto yyabort + case c == ' ': + goto yystate28 + } + +yystate28: + c = l.next() + switch { + default: + goto yyabort + case c == '\n': + goto yystate29 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate28 + } + +yystate29: + c = l.next() + goto yyrule7 + + goto yystate30 // silence unused label error +yystate30: + c = l.next() +yystart30: + switch { + default: + goto yyabort + case c == ',': + goto yystate31 + case c == '=': + goto yystate32 + case c == '}': + goto yystate34 + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate33 + } + +yystate31: + c = l.next() + goto yyrule13 + +yystate32: + c = l.next() + goto yyrule12 + +yystate33: + c = l.next() + switch { + default: + goto yyrule10 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate33 + } + +yystate34: + c = l.next() + goto yyrule11 + + goto yystate35 // silence unused label error +yystate35: + c = l.next() +yystart35: + switch { + default: + goto yyabort + case c == '"': + goto yystate36 + } + +yystate36: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate37 + case c == '\\': + goto yystate38 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate36 + } + +yystate37: + c = l.next() + goto yyrule14 + +yystate38: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate36 + } + + goto yystate39 // silence unused label error +yystate39: + c = l.next() +yystart39: + switch { + default: + goto yyabort + case c == ' ': + goto yystate40 + case c == '{': + goto yystate42 + } + +yystate40: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate41 + } + +yystate41: + c = l.next() + switch { + default: + goto yyrule15 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate41 + } + +yystate42: + c = l.next() + goto yyrule9 + + goto yystate43 // silence unused label error +yystate43: + c = l.next() +yystart43: + switch { + default: + goto yyabort + case c == ' ': + goto yystate45 + case c == '\n': + goto yystate44 + } + +yystate44: + c = l.next() + goto yyrule18 + +yystate45: + c = l.next() + switch { + default: + goto yyabort + case c == '#': + goto yystate47 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c == '!' || c == '"' || c >= '$' && c <= 'ÿ': + goto yystate46 + } + +yystate46: + c = l.next() + switch { + default: + goto yyrule16 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate46 + } + +yystate47: + c = l.next() + switch { + default: + goto yyrule16 + case c == ' ': + goto yystate48 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate46 + } + +yystate48: + c = l.next() + switch { + default: + goto yyabort + case c == '\n': + goto yystate49 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate48 + } + +yystate49: + c = l.next() + goto yyrule17 + +yyrule1: // #{S} + { + l.state = sComment + goto yystate0 + } +yyrule2: // HELP{S} + { + l.state = sMeta1 + return tHelp + goto yystate0 + } +yyrule3: // TYPE{S} + { + l.state = sMeta1 + return tType + goto yystate0 + } +yyrule4: // UNIT{S} + { + l.state = sMeta1 + return tUnit + goto yystate0 + } +yyrule5: // "EOF"\n? + { + l.state = sInit + return tEofWord + goto yystate0 + } +yyrule6: // {M}({M}|{D})* + { + l.state = sMeta2 + return tMName + goto yystate0 + } +yyrule7: // {S}{C}*\n + { + l.state = sInit + return tText + goto yystate0 + } +yyrule8: // {M}({M}|{D})* + { + l.state = sValue + return tMName + goto yystate0 + } +yyrule9: // \{ + { + l.state = sLabels + return tBraceOpen + goto yystate0 + } +yyrule10: // {L}({L}|{D})* + { + return tLName + } +yyrule11: // \} + { + l.state = sValue + return tBraceClose + goto yystate0 + } +yyrule12: // = + { + l.state = sLValue + return tEqual + goto yystate0 + } +yyrule13: // , + { + return tComma + } +yyrule14: // \"(\\.|[^\\"\n])*\" + { + l.state = sLabels + return tLValue + goto yystate0 + } +yyrule15: // {S}[^ \n]+ + { + l.state = sTimestamp + return tValue + goto yystate0 + } +yyrule16: // {S}[^ \n]+ + { + return tTimestamp + } +yyrule17: // {S}#{S}{C}*\n + { + l.state = sInit + return tLinebreak + goto yystate0 + } +yyrule18: // \n + { + l.state = sInit + return tLinebreak + goto yystate0 + } + panic("unreachable") + + goto yyabort // silence unused label error + +yyabort: // no lexem recognized + + return tInvalid +} diff --git a/pkg/textparse/omparse.go b/pkg/textparse/omparse.go new file mode 100644 index 000000000..a41c8e45c --- /dev/null +++ b/pkg/textparse/omparse.go @@ -0,0 +1,346 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:generate go get github.com/cznic/golex +//go:generate golex -o=omlex.l.go omlex.l + +package textparse + +import ( + "errors" + "fmt" + "io" + "math" + "sort" + "strconv" + "strings" + "unicode/utf8" + + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/pkg/value" +) + +type omlexer struct { + b []byte + i int + start int + err error + state int +} + +// buf returns the buffer of the current token. +func (l *omlexer) buf() []byte { + return l.b[l.start:l.i] +} + +func (l *omlexer) cur() byte { + return l.b[l.i] +} + +// next advances the omlexer to the next character. +func (l *omlexer) next() byte { + l.i++ + if l.i >= len(l.b) { + l.err = io.EOF + return byte(tEOF) + } + // Lex struggles with null bytes. If we are in a label value or help string, where + // they are allowed, consume them here immediately. + for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) { + l.i++ + if l.i >= len(l.b) { + l.err = io.EOF + return byte(tEOF) + } + } + return l.b[l.i] +} + +func (l *omlexer) Error(es string) { + l.err = errors.New(es) +} + +// OMParser parses samples from a byte slice of samples in the official +// OpenMetrics text exposition format. +type OMParser struct { + l *omlexer + series []byte + text []byte + mtype MetricType + val float64 + ts int64 + hasTS bool + start int + offsets []int +} + +// New returns a new parser of the byte slice. +func NewOMParser(b []byte) Parser { + return &OMParser{l: &omlexer{b: b}} +} + +// Series returns the bytes of the series, the timestamp if set, and the value +// of the current sample. +func (p *OMParser) Series() ([]byte, *int64, float64) { + if p.hasTS { + return p.series, &p.ts, p.val + } + return p.series, nil, p.val +} + +// Help returns the metric name and help text in the current entry. +// Must only be called after Next returned a help entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OMParser) Help() ([]byte, []byte) { + m := p.l.b[p.offsets[0]:p.offsets[1]] + + // Replacer causes allocations. Replace only when necessary. + if strings.IndexByte(yoloString(p.text), byte('\\')) >= 0 { + // OpenMetrics always uses the Prometheus format label value escaping. + return m, []byte(lvalReplacer.Replace(string(p.text))) + } + return m, p.text +} + +// Type returns the metric name and type in the current entry. +// Must only be called after Next returned a type entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OMParser) Type() ([]byte, MetricType) { + return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype +} + +// Unit returns the metric name and unit in the current entry. +// Must only be called after Next returned a unit entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OMParser) Unit() ([]byte, []byte) { + // The Prometheus format does not have units. + return p.l.b[p.offsets[0]:p.offsets[1]], p.text +} + +// Comment returns the text of the current comment. +// Must only be called after Next returned a comment entry. +// The returned byte slice becomes invalid after the next call to Next. +func (p *OMParser) Comment() []byte { + return p.text +} + +// Metric writes the labels of the current sample into the passed labels. +// It returns the string from which the metric was parsed. +func (p *OMParser) Metric(l *labels.Labels) string { + // Allocate the full immutable string immediately, so we just + // have to create references on it below. + s := string(p.series) + + *l = append(*l, labels.Label{ + Name: labels.MetricName, + Value: s[:p.offsets[0]-p.start], + }) + + for i := 1; i < len(p.offsets); i += 4 { + a := p.offsets[i] - p.start + b := p.offsets[i+1] - p.start + c := p.offsets[i+2] - p.start + d := p.offsets[i+3] - p.start + + // Replacer causes allocations. Replace only when necessary. + if strings.IndexByte(s[c:d], byte('\\')) >= 0 { + *l = append(*l, labels.Label{Name: s[a:b], Value: lvalReplacer.Replace(s[c:d])}) + continue + } + *l = append(*l, labels.Label{Name: s[a:b], Value: s[c:d]}) + } + + // Sort labels. We can skip the first entry since the metric name is + // already at the right place. + sort.Sort((*l)[1:]) + + return s +} + +// nextToken returns the next token from the omlexer. +func (p *OMParser) nextToken() token { + tok := p.l.Lex() + return tok +} + +// Next advances the parser to the next sample. It returns false if no +// more samples were read or an error occurred. +func (p *OMParser) Next() (Entry, error) { + var err error + + p.start = p.l.i + p.offsets = p.offsets[:0] + + switch t := p.nextToken(); t { + case tEofWord: + if t := p.nextToken(); t != tEOF { + return EntryInvalid, fmt.Errorf("unexpected data after # EOF") + } + return EntryInvalid, io.EOF + case tEOF: + return EntryInvalid, parseError("unexpected end of data", t) + case tHelp, tType, tUnit: + switch t := p.nextToken(); t { + case tMName: + p.offsets = append(p.offsets, p.l.start, p.l.i) + default: + return EntryInvalid, parseError("expected metric name after HELP", t) + } + switch t := p.nextToken(); t { + case tText: + if len(p.l.buf()) > 1 { + p.text = p.l.buf()[1 : len(p.l.buf())-1] + } else { + p.text = []byte{} + } + default: + return EntryInvalid, parseError("expected text in HELP", t) + } + switch t { + case tType: + switch s := yoloString(p.text); s { + case "counter": + p.mtype = MetricTypeCounter + case "gauge": + p.mtype = MetricTypeGauge + case "histogram": + p.mtype = MetricTypeHistogram + case "gaugehistogram": + p.mtype = MetricTypeGaugeHistogram + case "summary": + p.mtype = MetricTypeSummary + case "info": + p.mtype = MetricTypeInfo + case "stateset": + p.mtype = MetricTypeStateset + case "unknown": + p.mtype = MetricTypeUnknown + default: + return EntryInvalid, fmt.Errorf("invalid metric type %q", s) + } + case tHelp: + if !utf8.Valid(p.text) { + return EntryInvalid, fmt.Errorf("help text is not a valid utf8 string") + } + } + switch t { + case tHelp: + return EntryHelp, nil + case tType: + return EntryType, nil + case tUnit: + m := yoloString(p.l.b[p.offsets[0]:p.offsets[1]]) + u := yoloString(p.text) + if len(u) > 0 { + if !strings.HasSuffix(m, u) || len(m) < len(u)+1 || p.l.b[p.offsets[1]-len(u)-1] != '_' { + return EntryInvalid, fmt.Errorf("unit not a suffix of metric %q", m) + } + } + return EntryUnit, nil + } + + case tMName: + p.offsets = append(p.offsets, p.l.i) + p.series = p.l.b[p.start:p.l.i] + + t2 := p.nextToken() + if t2 == tBraceOpen { + if err := p.parseLVals(); err != nil { + return EntryInvalid, err + } + p.series = p.l.b[p.start:p.l.i] + t2 = p.nextToken() + } + if t2 != tValue { + return EntryInvalid, parseError("expected value after metric", t) + } + if p.val, err = strconv.ParseFloat(yoloString(p.l.buf()[1:]), 64); err != nil { + return EntryInvalid, err + } + // Ensure canonical NaN value. + if math.IsNaN(p.val) { + p.val = math.Float64frombits(value.NormalNaN) + } + p.hasTS = false + switch p.nextToken() { + case tLinebreak: + break + case tTimestamp: + p.hasTS = true + var ts float64 + // A float is enough to hold what we need for millisecond resolution. + if ts, err = strconv.ParseFloat(yoloString(p.l.buf()[1:]), 64); err != nil { + return EntryInvalid, err + } + p.ts = int64(ts * 1000) + if t2 := p.nextToken(); t2 != tLinebreak { + return EntryInvalid, parseError("expected next entry after timestamp", t) + } + default: + return EntryInvalid, parseError("expected timestamp or new record", t) + } + return EntrySeries, nil + + default: + err = fmt.Errorf("%q %q is not a valid start token", t, string(p.l.cur())) + } + return EntryInvalid, err +} + +func (p *OMParser) parseLVals() error { + first := true + for { + t := p.nextToken() + switch t { + case tBraceClose: + return nil + case tComma: + if first { + return parseError("expected label name or left brace", t) + } + t = p.nextToken() + if t != tLName { + return parseError("expected label name", t) + } + case tLName: + if !first { + return parseError("expected comma", t) + } + default: + if first { + return parseError("expected label name or left brace", t) + } + return parseError("expected comma or left brace", t) + + } + first = false + // t is now a label name. + + p.offsets = append(p.offsets, p.l.start, p.l.i) + + if t := p.nextToken(); t != tEqual { + return parseError("expected equal", t) + } + if t := p.nextToken(); t != tLValue { + return parseError("expected label value", t) + } + if !utf8.Valid(p.l.buf()) { + return fmt.Errorf("invalid UTF-8 label value") + } + + // The omlexer ensures the value string is quoted. Strip first + // and last character. + p.offsets = append(p.offsets, p.l.start+1, p.l.i-1) + + } +} diff --git a/pkg/textparse/omparse_test.go b/pkg/textparse/omparse_test.go new file mode 100644 index 000000000..13809e4e5 --- /dev/null +++ b/pkg/textparse/omparse_test.go @@ -0,0 +1,440 @@ +// Copyright 2017 The OMetheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "io" + "testing" + + "github.com/prometheus/prometheus/pkg/labels" + "github.com/stretchr/testify/require" +) + +func TestOMParse(t *testing.T) { + input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +# UNIT go_gc_duration_seconds seconds +go_gc_duration_seconds{quantile="0"} 4.9351e-05 +go_gc_duration_seconds{quantile="0.25"} 7.424100000000001e-05 +go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 +# HELP nohelp1 +# HELP help2 escape \ \n \\ \" \x chars +# UNIT nounit +go_gc_duration_seconds{quantile="1.0",a="b"} 8.3835e-05 +go_gc_duration_seconds_count 99 +some:aggregate:rate5m{a_b="c"} 1 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 33 123.123 +# TYPE hh histogram +hh_bucket{le="+Inf"} 1 # {} 4 +# TYPE gh gaugehistogram +gh_bucket{le="+Inf"} 1 # {} 4 +# TYPE ii info +ii{foo="bar"} 1 +# TYPE ss stateset +ss{ss="foo"} 1 +ss{ss="bar"} 0 +# TYPE un unknown +_metric_starting_with_underscore 1 +testmetric{_label_starting_with_underscore="foo"} 1 +testmetric{label="\"bar\""} 1` + + input += "\n# HELP metric foo\x00bar" + input += "\nnull_byte_metric{a=\"abc\x00\"} 1" + input += "\n# EOF\n" + + int64p := func(x int64) *int64 { return &x } + + exp := []struct { + lset labels.Labels + m string + t *int64 + v float64 + typ MetricType + help string + unit string + comment string + }{ + { + m: "go_gc_duration_seconds", + help: "A summary of the GC invocation durations.", + }, { + m: "go_gc_duration_seconds", + typ: MetricTypeSummary, + }, { + m: "go_gc_duration_seconds", + unit: "seconds", + }, { + m: `go_gc_duration_seconds{quantile="0"}`, + v: 4.9351e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + }, { + m: `go_gc_duration_seconds{quantile="0.25"}`, + v: 7.424100000000001e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.25"), + }, { + m: `go_gc_duration_seconds{quantile="0.5",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.5", "a", "b"), + }, { + m: "nohelp1", + help: "", + }, { + m: "help2", + help: "escape \\ \n \\ \" \\x chars", + }, { + m: "nounit", + unit: "", + }, { + m: `go_gc_duration_seconds{quantile="1.0",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `go_gc_duration_seconds_count`, + v: 99, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds_count"), + }, { + m: `some:aggregate:rate5m{a_b="c"}`, + v: 1, + lset: labels.FromStrings("__name__", "some:aggregate:rate5m", "a_b", "c"), + }, { + m: "go_goroutines", + help: "Number of goroutines that currently exist.", + }, { + m: "go_goroutines", + typ: MetricTypeGauge, + }, { + m: `go_goroutines`, + v: 33, + t: int64p(123123), + lset: labels.FromStrings("__name__", "go_goroutines"), + }, { + m: "hh", + typ: MetricTypeHistogram, + }, { + m: `hh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "hh_bucket", "le", "+Inf"), + }, { + m: "gh", + typ: MetricTypeGaugeHistogram, + }, { + m: `gh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "gh_bucket", "le", "+Inf"), + }, { + m: "ii", + typ: MetricTypeInfo, + }, { + m: `ii{foo="bar"}`, + v: 1, + lset: labels.FromStrings("__name__", "ii", "foo", "bar"), + }, { + m: "ss", + typ: MetricTypeStateset, + }, { + m: `ss{ss="foo"}`, + v: 1, + lset: labels.FromStrings("__name__", "ss", "ss", "foo"), + }, { + m: `ss{ss="bar"}`, + v: 0, + lset: labels.FromStrings("__name__", "ss", "ss", "bar"), + }, { + m: "un", + typ: MetricTypeUnknown, + }, { + m: "_metric_starting_with_underscore", + v: 1, + lset: labels.FromStrings("__name__", "_metric_starting_with_underscore"), + }, { + m: "testmetric{_label_starting_with_underscore=\"foo\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "_label_starting_with_underscore", "foo"), + }, { + m: "testmetric{label=\"\\\"bar\\\"\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: "metric", + help: "foo\x00bar", + }, { + m: "null_byte_metric{a=\"abc\x00\"}", + v: 1, + lset: labels.FromStrings("__name__", "null_byte_metric", "a", "abc\x00"), + }, + } + + p := NewOMParser([]byte(input)) + i := 0 + + var res labels.Labels + + for { + et, err := p.Next() + if err == io.EOF { + break + } + require.NoError(t, err) + + switch et { + case EntrySeries: + m, ts, v := p.Series() + + p.Metric(&res) + + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].t, ts) + require.Equal(t, exp[i].v, v) + require.Equal(t, exp[i].lset, res) + res = res[:0] + + case EntryType: + m, typ := p.Type() + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].typ, typ) + + case EntryHelp: + m, h := p.Help() + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].help, string(h)) + + case EntryUnit: + m, u := p.Unit() + require.Equal(t, exp[i].m, string(m)) + require.Equal(t, exp[i].unit, string(u)) + + case EntryComment: + require.Equal(t, exp[i].comment, string(p.Comment())) + } + + i++ + } + require.Equal(t, len(exp), i) +} + +func TestOMParseErrors(t *testing.T) { + cases := []struct { + input string + err string + }{ + { + input: "", + err: "unexpected end of data, got \"EOF\"", + }, + { + input: "a", + err: "expected value after metric, got \"MNAME\"", + }, + { + input: "\n", + err: "\"INVALID\" \"\\n\" is not a valid start token", + }, + { + input: " a 1\n", + err: "\"INVALID\" \" \" is not a valid start token", + }, + { + input: "9\n", + err: "\"INVALID\" \"9\" is not a valid start token", + }, + { + input: "# TYPE u untyped\n", + err: "invalid metric type \"untyped\"", + }, + { + input: "# TYPE c counter \n", + err: "invalid metric type \"counter \"", + }, + { + input: "# TYPE c counter\n", + err: "\"INVALID\" \" \" is not a valid start token", + }, + { + input: "# UNIT metric suffix\n", + err: "unit not a suffix of metric \"metric\"", + }, + { + input: "# UNIT metricsuffix suffix\n", + err: "unit not a suffix of metric \"metricsuffix\"", + }, + { + input: "# UNIT m suffix\n", + err: "unit not a suffix of metric \"m\"", + }, + { + input: "# HELP m\n", + err: "expected text in HELP, got \"INVALID\"", + }, + { + input: "a\t1\n", + err: "expected value after metric, got \"MNAME\"", + }, + { + input: "a 1\t2\n", + err: "strconv.ParseFloat: parsing \"1\\t2\": invalid syntax", + }, + { + input: "a 1 2 \n", + err: "expected next entry after timestamp, got \"MNAME\"", + }, + { + input: "a 1 2 #\n", + err: "expected next entry after timestamp, got \"MNAME\"", + }, + { + input: "a 1 1z\n", + err: "strconv.ParseFloat: parsing \"1z\": invalid syntax", + }, + { + input: " # EOF\n", + err: "\"INVALID\" \" \" is not a valid start token", + }, + { + input: "# EOF\na 1", + err: "unexpected data after # EOF", + }, + { + input: "# EOF\n\n", + err: "unexpected data after # EOF", + }, + { + input: "# EOFa 1", + err: "unexpected data after # EOF", + }, + { + input: "#\tTYPE c counter\n", + err: "\"INVALID\" \"\\t\" is not a valid start token", + }, + { + input: "# TYPE c counter\n", + err: "invalid metric type \" counter\"", + }, + { + input: "a 1 1 1\n", + err: "expected next entry after timestamp, got \"MNAME\"", + }, + { + input: "a{b='c'} 1\n", + err: "expected label value, got \"INVALID\"", + }, + { + input: "a{b=\"c\",} 1\n", + err: "expected label name, got \"BCLOSE\"", + }, + { + input: "a{,b=\"c\"} 1\n", + err: "expected label name or left brace, got \"COMMA\"", + }, + { + input: "a{b=\"c\"d=\"e\"} 1\n", + err: "expected comma, got \"LNAME\"", + }, + { + input: "a{b=\"c\",,d=\"e\"} 1\n", + err: "expected label name, got \"COMMA\"", + }, + { + input: "a{b=\n", + err: "expected label value, got \"INVALID\"", + }, + { + input: "a{\xff=\"foo\"} 1\n", + err: "expected label name or left brace, got \"INVALID\"", + }, + { + input: "a{b=\"\xff\"} 1\n", + err: "invalid UTF-8 label value", + }, + { + input: "a true\n", + err: "strconv.ParseFloat: parsing \"true\": invalid syntax", + }, + { + input: "something_weird{problem=\"", + err: "expected label value, got \"INVALID\"", + }, + { + input: "empty_label_name{=\"\"} 0", + err: "expected label name or left brace, got \"EQUAL\"", + }, + } + + for i, c := range cases { + p := NewOMParser([]byte(c.input)) + var err error + for err == nil { + _, err = p.Next() + } + require.NotNil(t, err) + require.Equal(t, c.err, err.Error(), "test %d", i) + } +} + +func TestOMNullByteHandling(t *testing.T) { + cases := []struct { + input string + err string + }{ + { + input: "null_byte_metric{a=\"abc\x00\"} 1\n# EOF\n", + err: "", + }, + { + input: "a{b=\"\x00ss\"} 1\n# EOF\n", + err: "", + }, + { + input: "a{b=\"\x00\"} 1\n# EOF\n", + err: "", + }, + { + input: "a{b=\"\x00\"} 1\n# EOF", + err: "", + }, + { + input: "a{b=\x00\"ssss\"} 1\n# EOF\n", + err: "expected label value, got \"INVALID\"", + }, + { + input: "a{b=\"\x00", + err: "expected label value, got \"INVALID\"", + }, + { + input: "a{b\x00=\"hiih\"} 1", + err: "expected equal, got \"INVALID\"", + }, + { + input: "a\x00{b=\"ddd\"} 1", + err: "expected value after metric, got \"MNAME\"", + }, + } + + for i, c := range cases { + p := NewOMParser([]byte(c.input)) + var err error + for err == nil { + _, err = p.Next() + } + + if c.err == "" { + require.Equal(t, io.EOF, err, "test %d", i) + continue + } + + require.Error(t, err) + require.Equal(t, c.err, err.Error(), "test %d", i) + } +} diff --git a/pkg/textparse/promparse.go b/pkg/textparse/promparse.go index b1fa90a1d..2b2cbdaba 100644 --- a/pkg/textparse/promparse.go +++ b/pkg/textparse/promparse.go @@ -48,6 +48,8 @@ const ( tWhitespace tHelp tType + tUnit + tEofWord tText tComment tBlank @@ -76,6 +78,10 @@ func (t token) String() string { return "HELP" case tType: return "TYPE" + case tUnit: + return "UNIT" + case tEofWord: + return "EOFWORD" case tText: return "TEXT" case tComment: @@ -180,6 +186,14 @@ func (p *PromParser) Type() ([]byte, MetricType) { return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype } +// Unit returns the metric name and unit in the current entry. +// Must only be called after Next returned a unit entry. +// The returned byte slices become invalid after the next call to Next. +func (p *PromParser) Unit() ([]byte, []byte) { + // The Prometheus format does not have units. + return nil, nil +} + // Comment returns the text of the current comment. // Must only be called after Next returned a comment entry. // The returned byte slice becomes invalid after the next call to Next. @@ -278,7 +292,7 @@ func (p *PromParser) Next() (Entry, error) { case "summary": p.mtype = MetricTypeSummary case "untyped": - p.mtype = MetricTypeUntyped + p.mtype = MetricTypeUnknown default: return EntryInvalid, fmt.Errorf("invalid metric type %q", s) } diff --git a/pkg/textparse/promparse_test.go b/pkg/textparse/promparse_test.go index 3e493f3ab..ae822e3fa 100644 --- a/pkg/textparse/promparse_test.go +++ b/pkg/textparse/promparse_test.go @@ -317,136 +317,142 @@ const ( promtestdataSampleCount = 410 ) -func BenchmarkPromParse(b *testing.B) { - for _, fn := range []string{"promtestdata.txt", "promtestdata.nometa.txt"} { - f, err := os.Open(fn) - require.NoError(b, err) - defer f.Close() - - buf, err := ioutil.ReadAll(f) - require.NoError(b, err) - - b.Run("no-decode-metric/"+fn, func(b *testing.B) { - total := 0 +func BenchmarkParse(b *testing.B) { + for parserName, parser := range map[string]func([]byte) Parser{ + "prometheus": NewPromParser, + "openmetrics": NewOMParser, + } { - b.SetBytes(int64(len(buf) * (b.N / promtestdataSampleCount))) - b.ReportAllocs() - b.ResetTimer() + for _, fn := range []string{"promtestdata.txt", "promtestdata.nometa.txt"} { + f, err := os.Open(fn) + require.NoError(b, err) + defer f.Close() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := NewPromParser(buf) + buf, err := ioutil.ReadAll(f) + require.NoError(b, err) - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if err == io.EOF { - break Outer + b.Run(parserName+"/no-decode-metric/"+fn, func(b *testing.B) { + total := 0 + + b.SetBytes(int64(len(buf) * (b.N / promtestdataSampleCount))) + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i += promtestdataSampleCount { + p := parser(buf) + + Outer: + for i < b.N { + t, err := p.Next() + switch t { + case EntryInvalid: + if err == io.EOF { + break Outer + } + b.Fatal(err) + case EntrySeries: + m, _, _ := p.Series() + total += len(m) + i++ } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - total += len(m) - i++ } } - } - _ = total - }) - b.Run("decode-metric/"+fn, func(b *testing.B) { - total := 0 - - b.SetBytes(int64(len(buf) * (b.N / promtestdataSampleCount))) - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i += promtestdataSampleCount { - p := NewPromParser(buf) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if err == io.EOF { - break Outer + _ = total + }) + b.Run(parserName+"/decode-metric/"+fn, func(b *testing.B) { + total := 0 + + b.SetBytes(int64(len(buf) * (b.N / promtestdataSampleCount))) + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i += promtestdataSampleCount { + p := parser(buf) + + Outer: + for i < b.N { + t, err := p.Next() + switch t { + case EntryInvalid: + if err == io.EOF { + break Outer + } + b.Fatal(err) + case EntrySeries: + m, _, _ := p.Series() + + res := make(labels.Labels, 0, 5) + p.Metric(&res) + + total += len(m) + i++ } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - - res := make(labels.Labels, 0, 5) - p.Metric(&res) - - total += len(m) - i++ } } - } - _ = total - }) - b.Run("decode-metric-reuse/"+fn, func(b *testing.B) { - total := 0 - res := make(labels.Labels, 0, 5) - - b.SetBytes(int64(len(buf) * (b.N / promtestdataSampleCount))) - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i += promtestdataSampleCount { - p := NewPromParser(buf) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if err == io.EOF { - break Outer + _ = total + }) + b.Run(parserName+"/decode-metric-reuse/"+fn, func(b *testing.B) { + total := 0 + res := make(labels.Labels, 0, 5) + + b.SetBytes(int64(len(buf) * (b.N / promtestdataSampleCount))) + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i += promtestdataSampleCount { + p := parser(buf) + + Outer: + for i < b.N { + t, err := p.Next() + switch t { + case EntryInvalid: + if err == io.EOF { + break Outer + } + b.Fatal(err) + case EntrySeries: + m, _, _ := p.Series() + + p.Metric(&res) + + total += len(m) + i++ + res = res[:0] } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - - p.Metric(&res) - - total += len(m) - i++ - res = res[:0] } } - } - _ = total - }) - b.Run("expfmt-text/"+fn, func(b *testing.B) { - b.SetBytes(int64(len(buf) * (b.N / promtestdataSampleCount))) - b.ReportAllocs() - b.ResetTimer() - - total := 0 - - for i := 0; i < b.N; i += promtestdataSampleCount { - var ( - decSamples = make(model.Vector, 0, 50) - ) - sdec := expfmt.SampleDecoder{ - Dec: expfmt.NewDecoder(bytes.NewReader(buf), expfmt.FmtText), - Opts: &expfmt.DecodeOptions{ - Timestamp: model.TimeFromUnixNano(0), - }, - } + _ = total + }) + b.Run("expfmt-text/"+fn, func(b *testing.B) { + b.SetBytes(int64(len(buf) * (b.N / promtestdataSampleCount))) + b.ReportAllocs() + b.ResetTimer() + + total := 0 + + for i := 0; i < b.N; i += promtestdataSampleCount { + var ( + decSamples = make(model.Vector, 0, 50) + ) + sdec := expfmt.SampleDecoder{ + Dec: expfmt.NewDecoder(bytes.NewReader(buf), expfmt.FmtText), + Opts: &expfmt.DecodeOptions{ + Timestamp: model.TimeFromUnixNano(0), + }, + } - for { - if err = sdec.Decode(&decSamples); err != nil { - break + for { + if err = sdec.Decode(&decSamples); err != nil { + break + } + total += len(decSamples) + decSamples = decSamples[:0] } - total += len(decSamples) - decSamples = decSamples[:0] } - } - _ = total - }) + _ = total + }) + } } } func BenchmarkGzip(b *testing.B) { diff --git a/pkg/textparse/promtestdata.nometa.txt b/pkg/textparse/promtestdata.nometa.txt index e760ad268..235f0aa46 100644 --- a/pkg/textparse/promtestdata.nometa.txt +++ b/pkg/textparse/promtestdata.nometa.txt @@ -408,3 +408,4 @@ prometheus_target_sync_length_seconds_sum{scrape_job="prometheus"} 0.00020043300 prometheus_target_sync_length_seconds_count{scrape_job="prometheus"} 1 prometheus_treecache_watcher_goroutines 0 prometheus_treecache_zookeeper_failures_total 0 +# EOF diff --git a/pkg/textparse/promtestdata.txt b/pkg/textparse/promtestdata.txt index c7f2a7af0..174f383e9 100644 --- a/pkg/textparse/promtestdata.txt +++ b/pkg/textparse/promtestdata.txt @@ -525,4 +525,5 @@ prometheus_target_sync_length_seconds_count{scrape_job="prometheus"} 1 prometheus_treecache_watcher_goroutines 0 # HELP prometheus_treecache_zookeeper_failures_total The total number of ZooKeeper failures. # TYPE prometheus_treecache_zookeeper_failures_total counter -prometheus_treecache_zookeeper_failures_total 0 \ No newline at end of file +prometheus_treecache_zookeeper_failures_total 0 +# EOF