From 0e78b7e7c8f45b38fb4fe6fbc0759e6bb3c800ae Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Thu, 22 Jun 2017 13:08:55 +0530 Subject: [PATCH] parser: support spaces everywhere Signed-off-by: Goutham Veeramachaneni --- pkg/textparse/lex.l | 29 ++-- pkg/textparse/lex.l.go | 316 +++++++++++++++++++++++------------- pkg/textparse/parse.go | 5 +- pkg/textparse/parse_test.go | 31 ++++ 4 files changed, 260 insertions(+), 121 deletions(-) diff --git a/pkg/textparse/lex.l b/pkg/textparse/lex.l index a2336cce4..c8963640c 100644 --- a/pkg/textparse/lex.l +++ b/pkg/textparse/lex.l @@ -30,11 +30,13 @@ import ( func (l *lexer) Lex() int { const ( lstateInit = iota + lstateName lstateValue lstateTimestamp lstateLabels lstateLName lstateLValue + lstateLValueIn ) s := lstateInit @@ -53,7 +55,7 @@ S [a-zA-Z] L [a-zA-Z_] M [a-zA-Z_:] -%x lstateValue lstateTimestamp lstateLabels lstateLName lstateLValue +%x lstateName lstateValue lstateTimestamp lstateLabels lstateLName lstateLValue lstateLValueIn %yyc c @@ -67,23 +69,30 @@ M [a-zA-Z_:] #[^\r\n]*\n l.mstart = l.i [\r\n \t]+ l.mstart = l.i -{S}({M}|{D})*\{ s = lstateLabels - l.offsets = append(l.offsets, l.i-1) -{S}({M}|{D})* s = lstateValue - l.mend = l.i +{S}({M}|{D})* s = lstateName l.offsets = append(l.offsets, l.i) + l.mend = l.i + +([ \t]*)\{ s = lstateLabels + +[ \t]+ s = lstateValue + l.vstart = l.i + [ \t]+ ,?\} s = lstateValue l.mend = l.i -,? s = lstateLName +(,?[ \t]*) s = lstateLName l.offsets = append(l.offsets, l.i) -{S}({L}|{D})*= s = lstateLValue - l.offsets = append(l.offsets, l.i-1) +{S}({L}|{D})* l.offsets = append(l.offsets, l.i) +[ \t]*= s = lstateLValue -\"(\\.|[^\\"]|\0)*\" s = lstateLabels - if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1]+2:l.i-1]) { +[ \t]+ +\" s = lstateLValueIn + l.offsets = append(l.offsets, l.i) +(\\.|[^\\"]|\0)*\" s = lstateLabels + if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1]:l.i-1]) { l.err = fmt.Errorf("Invalid UTF-8 label value.") return -1 } diff --git a/pkg/textparse/lex.l.go b/pkg/textparse/lex.l.go index 1879e766f..cf7978048 100644 --- a/pkg/textparse/lex.l.go +++ b/pkg/textparse/lex.l.go @@ -30,11 +30,13 @@ import ( func (l *lexer) Lex() int { const ( lstateInit = iota + lstateName lstateValue lstateTimestamp lstateLabels lstateLName lstateLValue + lstateLValueIn ) s := lstateInit @@ -54,16 +56,20 @@ yystate0: panic(fmt.Errorf(`invalid start condition %d`, yyt)) case 0: // start condition: INITIAL goto yystart1 - case 1: // start condition: lstateValue - goto yystart8 - case 2: // start condition: lstateTimestamp - goto yystart14 - case 3: // start condition: lstateLabels - goto yystart19 - case 4: // start condition: lstateLName - goto yystart23 - case 5: // start condition: lstateLValue + case 1: // start condition: lstateName + goto yystart7 + case 2: // start condition: lstateValue + goto yystart10 + case 3: // start condition: lstateTimestamp + goto yystart16 + case 4: // start condition: lstateLabels + goto yystart21 + case 5: // start condition: lstateLName goto yystart26 + case 6: // start condition: lstateLValue + goto yystart30 + case 7: // start condition: lstateLValueIn + goto yystart33 } goto yystate0 // silence unused label error @@ -116,225 +122,299 @@ yystate6: c = l.next() switch { default: - goto yyrule5 - case c == '{': - goto yystate7 + goto yyrule4 case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': goto yystate6 } + goto yystate7 // silence unused label error yystate7: c = l.next() - goto yyrule4 +yystart7: + switch { + default: + goto yyabort + case c == '\t' || c == ' ': + goto yystate8 + case c == '{': + goto yystate9 + } - goto yystate8 // silence unused label error yystate8: c = l.next() -yystart8: + switch { + default: + goto yyrule6 + case c == '\t' || c == ' ': + goto yystate8 + case c == '{': + goto yystate9 + } + +yystate9: + c = l.next() + goto yyrule5 + + goto yystate10 // silence unused label error +yystate10: + c = l.next() +yystart10: switch { default: goto yyabort case c == 'N': - goto yystate11 + goto yystate13 case c == '\t' || c == ' ': - goto yystate10 + goto yystate12 case c >= '\x01' && c <= '\b' || c == '\v' || c == '\f' || c >= '\x0e' && c <= '\x1f' || c >= '!' && c <= 'M' || c >= 'O' && c <= 'ÿ': - goto yystate9 + goto yystate11 } -yystate9: +yystate11: c = l.next() switch { default: - goto yyrule13 + goto yyrule17 case c >= '\x01' && c <= '\b' || c == '\v' || c == '\f' || c >= '\x0e' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate9 + goto yystate11 } -yystate10: +yystate12: c = l.next() switch { default: - goto yyrule11 + goto yyrule15 case c == '\t' || c == ' ': - goto yystate10 + goto yystate12 } -yystate11: +yystate13: c = l.next() switch { default: - goto yyrule13 + goto yyrule17 case c == 'a': - goto yystate12 + goto yystate14 case c >= '\x01' && c <= '\b' || c == '\v' || c == '\f' || c >= '\x0e' && c <= '\x1f' || c >= '!' && c <= '`' || c >= 'b' && c <= 'ÿ': - goto yystate9 + goto yystate11 } -yystate12: +yystate14: c = l.next() switch { default: - goto yyrule13 + goto yyrule17 case c == 'N': - goto yystate13 + goto yystate15 case c >= '\x01' && c <= '\b' || c == '\v' || c == '\f' || c >= '\x0e' && c <= '\x1f' || c >= '!' && c <= 'M' || c >= 'O' && c <= 'ÿ': - goto yystate9 + goto yystate11 } -yystate13: +yystate15: c = l.next() switch { default: - goto yyrule12 + goto yyrule16 case c >= '\x01' && c <= '\b' || c == '\v' || c == '\f' || c >= '\x0e' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate9 + goto yystate11 } - goto yystate14 // silence unused label error -yystate14: + goto yystate16 // silence unused label error +yystate16: c = l.next() -yystart14: +yystart16: switch { default: goto yyabort case c == '\n' || c == '\r': - goto yystate17 + goto yystate19 case c == '\t' || c == ' ': - goto yystate16 + goto yystate18 case c == '\x00': - goto yystate15 + goto yystate17 case c >= '0' && c <= '9': - goto yystate18 + goto yystate20 } -yystate15: +yystate17: c = l.next() - goto yyrule17 + goto yyrule21 -yystate16: +yystate18: c = l.next() switch { default: - goto yyrule14 + goto yyrule18 case c == '\t' || c == ' ': - goto yystate16 + goto yystate18 } -yystate17: +yystate19: c = l.next() switch { default: - goto yyrule16 + goto yyrule20 case c == '\n' || c == '\r': - goto yystate17 + goto yystate19 } -yystate18: +yystate20: c = l.next() switch { default: - goto yyrule15 + goto yyrule19 case c >= '0' && c <= '9': - goto yystate18 + goto yystate20 } - goto yystate19 // silence unused label error -yystate19: + goto yystate21 // silence unused label error +yystate21: c = l.next() -yystart19: +yystart21: switch { default: - goto yyrule8 + goto yyrule9 case c == ',': - goto yystate21 + goto yystate23 case c == '\t' || c == ' ': - goto yystate20 - case c == '}': goto yystate22 + case c == '}': + goto yystate25 } -yystate20: +yystate22: c = l.next() switch { default: - goto yyrule6 + goto yyrule7 case c == '\t' || c == ' ': - goto yystate20 + goto yystate22 } -yystate21: +yystate23: c = l.next() switch { default: - goto yyrule8 + goto yyrule9 + case c == '\t' || c == ' ': + goto yystate24 case c == '}': - goto yystate22 + goto yystate25 } -yystate22: +yystate24: c = l.next() - goto yyrule7 + switch { + default: + goto yyrule9 + case c == '\t' || c == ' ': + goto yystate24 + } - goto yystate23 // silence unused label error -yystate23: +yystate25: + c = l.next() + goto yyrule8 + + goto yystate26 // silence unused label error +yystate26: c = l.next() -yystart23: +yystart26: switch { default: goto yyabort + case c == '=': + goto yystate28 + case c == '\t' || c == ' ': + goto yystate27 case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z': - goto yystate24 + goto yystate29 } -yystate24: +yystate27: c = l.next() switch { default: goto yyabort case c == '=': - goto yystate25 - case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate24 + goto yystate28 + case c == '\t' || c == ' ': + goto yystate27 } -yystate25: +yystate28: c = l.next() - goto yyrule9 + goto yyrule11 - goto yystate26 // silence unused label error -yystate26: +yystate29: c = l.next() -yystart26: + switch { + default: + goto yyrule10 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate29 + } + + goto yystate30 // silence unused label error +yystate30: + c = l.next() +yystart30: switch { default: goto yyabort case c == '"': - goto yystate27 + goto yystate32 + case c == '\t' || c == ' ': + goto yystate31 } -yystate27: +yystate31: c = l.next() switch { default: - goto yystate27 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ' + goto yyrule12 + case c == '\t' || c == ' ': + goto yystate31 + } + +yystate32: + c = l.next() + goto yyrule13 + + goto yystate33 // silence unused label error +yystate33: + c = l.next() +yystart33: + switch { + default: + goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ' case c == '"': - goto yystate28 + goto yystate35 case c == '\\': - goto yystate29 + goto yystate36 } -yystate28: +yystate34: c = l.next() - goto yyrule10 + switch { + default: + goto yystate34 // c >= '\x00' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ' + case c == '"': + goto yystate35 + case c == '\\': + goto yystate36 + } -yystate29: +yystate35: + c = l.next() + goto yyrule14 + +yystate36: c = l.next() switch { default: goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate27 + goto yystate34 } yyrule1: // \0 @@ -351,62 +431,80 @@ yyrule3: // [\r\n \t]+ l.mstart = l.i goto yystate0 } -yyrule4: // {S}({M}|{D})*\{ +yyrule4: // {S}({M}|{D})* + { + s = lstateName + l.offsets = append(l.offsets, l.i) + l.mend = l.i + goto yystate0 + } +yyrule5: // ([ \t]*)\{ { s = lstateLabels - l.offsets = append(l.offsets, l.i-1) goto yystate0 } -yyrule5: // {S}({M}|{D})* +yyrule6: // [ \t]+ { s = lstateValue - l.mend = l.i - l.offsets = append(l.offsets, l.i) + l.vstart = l.i goto yystate0 } -yyrule6: // [ \t]+ +yyrule7: // [ \t]+ goto yystate0 -yyrule7: // ,?\} +yyrule8: // ,?\} { s = lstateValue l.mend = l.i goto yystate0 } -yyrule8: // ,? +yyrule9: // (,?[ \t]*) { s = lstateLName l.offsets = append(l.offsets, l.i) goto yystate0 } -yyrule9: // {S}({L}|{D})*= +yyrule10: // {S}({L}|{D})* + { + l.offsets = append(l.offsets, l.i) + goto yystate0 + } +yyrule11: // [ \t]*= { s = lstateLValue - l.offsets = append(l.offsets, l.i-1) goto yystate0 } -yyrule10: // \"(\\.|[^\\"]|\0)*\" +yyrule12: // [ \t]+ + + goto yystate0 +yyrule13: // \" + { + s = lstateLValueIn + l.offsets = append(l.offsets, l.i) + goto yystate0 + } +yyrule14: // (\\.|[^\\"]|\0)*\" { s = lstateLabels - if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1]+2 : l.i-1]) { + if !utf8.Valid(l.b[l.offsets[len(l.offsets)-1] : l.i-1]) { l.err = fmt.Errorf("Invalid UTF-8 label value.") return -1 } l.offsets = append(l.offsets, l.i-1) goto yystate0 } -yyrule11: // [ \t]+ +yyrule15: // [ \t]+ { l.vstart = l.i goto yystate0 } -yyrule12: // (NaN) +yyrule16: // (NaN) { l.val = math.Float64frombits(value.NormalNaN) s = lstateTimestamp goto yystate0 } -yyrule13: // [^\n \t\r]+ +yyrule17: // [^\n \t\r]+ { // We don't parse strictly correct floats as the conversion // repeats the effort anyway. @@ -417,12 +515,12 @@ yyrule13: // [^\n \t\r]+ s = lstateTimestamp goto yystate0 } -yyrule14: // [ \t]+ +yyrule18: // [ \t]+ { l.tstart = l.i goto yystate0 } -yyrule15: // {D}+ +yyrule19: // {D}+ { ts, err := strconv.ParseInt(yoloString(l.b[l.tstart:l.i]), 10, 64) if err != nil { @@ -432,12 +530,12 @@ yyrule15: // {D}+ l.ts = &ts goto yystate0 } -yyrule16: // [\r\n]+ +yyrule20: // [\r\n]+ { l.nextMstart = l.i return 1 } -yyrule17: // \0 +yyrule21: // \0 { return 1 diff --git a/pkg/textparse/parse.go b/pkg/textparse/parse.go index cfb0a4ce4..1792cecf2 100644 --- a/pkg/textparse/parse.go +++ b/pkg/textparse/parse.go @@ -110,12 +110,13 @@ func (p *Parser) Metric(l *labels.Labels) string { Value: s[:p.l.offsets[0]-p.l.mstart], }) - for i := 1; i < len(p.l.offsets); i += 3 { + for i := 1; i < len(p.l.offsets); i += 4 { a := p.l.offsets[i] - p.l.mstart b := p.l.offsets[i+1] - p.l.mstart c := p.l.offsets[i+2] - p.l.mstart + d := p.l.offsets[i+3] - p.l.mstart - *l = append(*l, labels.Label{Name: s[a:b], Value: s[b+2 : c]}) + *l = append(*l, labels.Label{Name: s[a:b], Value: s[c:d]}) } sort.Sort((*l)[1:]) diff --git a/pkg/textparse/parse_test.go b/pkg/textparse/parse_test.go index 3b9f15bb4..b11a9e8a5 100644 --- a/pkg/textparse/parse_test.go +++ b/pkg/textparse/parse_test.go @@ -33,6 +33,12 @@ func TestParse(t *testing.T) { go_gc_duration_seconds{quantile="0"} 4.9351e-05 go_gc_duration_seconds{quantile="0.25",} 7.424100000000001e-05 go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 +go_gc_duration_seconds{quantile="0.8", a="b"} 8.3835e-05 +go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05 +go_gc_duration_seconds{ quantile="1.0", a="b" } 8.3835e-05 +go_gc_duration_seconds { quantile="1.0", a="b" } 8.3835e-05 +go_gc_duration_seconds { quantile= "1.0", a= "b" } 8.3835e-05 +go_gc_duration_seconds { quantile = "1.0", a = "b" } 8.3835e-05 go_gc_duration_seconds_count 99 some:aggregate:rate5m{a_b="c"} 1 # HELP go_goroutines Number of goroutines that currently exist. @@ -60,6 +66,30 @@ go_goroutines 33 123123` m: `go_gc_duration_seconds{quantile="0.5",a="b"}`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.5", "a", "b"), + }, { + m: `go_gc_duration_seconds{quantile="0.8", a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.8", "a", "b"), + }, { + m: `go_gc_duration_seconds{ quantile="0.9", a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.9", "a", "b"), + }, { + m: `go_gc_duration_seconds{ quantile="1.0", a="b" }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `go_gc_duration_seconds { quantile="1.0", a="b" }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `go_gc_duration_seconds { quantile= "1.0", a= "b" }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `go_gc_duration_seconds { quantile = "1.0", a = "b" }`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), }, { m: `go_gc_duration_seconds_count`, v: 99, @@ -139,6 +169,7 @@ func TestParseErrors(t *testing.T) { p := New([]byte(c.input)) for p.Next() { } + require.NotNil(t, p.Err()) require.Equal(t, c.err, p.Err().Error()) } }