PromQL: Avoid lexer item copies and allocations (#6584)

* PromQL: Avoid lexer item copies and allocations

Signed-off-by: Tobias Guggenmos <tguggenm@redhat.com>
pull/6594/head
Tobias Guggenmos 2020-01-09 12:26:58 +01:00 committed by Brian Brazil
parent 097fda1440
commit b18b6cb332
4 changed files with 77 additions and 61 deletions

View File

@ -217,13 +217,14 @@ type Pos int
// Lexer holds the state of the scanner.
type Lexer struct {
input string // The string being scanned.
state stateFn // The next lexing function to enter.
pos Pos // Current position in the input.
start Pos // Start position of this Item.
width Pos // Width of last rune read from input.
lastPos Pos // Position of most recent Item returned by NextItem.
Items []Item // Slice buffer of scanned Items.
input string // The string being scanned.
state stateFn // The next lexing function to enter.
pos Pos // Current position in the input.
start Pos // Start position of this Item.
width Pos // Width of last rune read from input.
lastPos Pos // Position of most recent Item returned by NextItem.
itemp *Item // Pointer to where the next scanned item should be placed.
scannedItem bool // Set to true every time an item is scanned.
parenDepth int // Nesting depth of ( ) exprs.
braceOpen bool // Whether a { is opened.
@ -262,8 +263,9 @@ func (l *Lexer) backup() {
// emit passes an Item back to the client.
func (l *Lexer) emit(t ItemType) {
l.Items = append(l.Items, Item{t, l.start, l.input[l.start:l.pos]})
*l.itemp = Item{t, l.start, l.input[l.start:l.pos]}
l.start = l.pos
l.scannedItem = true
}
// ignore skips over the pending input before this point.
@ -308,23 +310,26 @@ func (l *Lexer) linePosition() int {
// errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.NextItem.
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
l.Items = append(l.Items, Item{ERROR, l.start, fmt.Sprintf(format, args...)})
*l.itemp = Item{ERROR, l.start, fmt.Sprintf(format, args...)}
l.scannedItem = true
return nil
}
// NextItem returns the next Item from the input.
func (l *Lexer) NextItem() Item {
for len(l.Items) == 0 {
if l.state != nil {
// NextItem writes the next item to the provided address.
func (l *Lexer) NextItem(itemp *Item) {
l.scannedItem = false
l.itemp = itemp
if l.state != nil {
for !l.scannedItem {
l.state = l.state(l)
} else {
l.emit(EOF)
}
} else {
l.emit(EOF)
}
Item := l.Items[0]
l.Items = l.Items[1:]
l.lastPos = Item.Pos
return Item
l.lastPos = l.itemp.Pos
}
// lex creates a new scanner for the input string.
@ -336,13 +341,6 @@ func Lex(input string) *Lexer {
return l
}
// run runs the state machine for the lexer.
func (l *Lexer) run() {
for l.state = lexStatements; l.state != nil; {
l.state = l.state(l)
}
}
// lineComment is the character that starts a line comment.
const lineComment = "#"
@ -442,7 +440,7 @@ func lexStatements(l *Lexer) stateFn {
case r == '{':
l.emit(LEFT_BRACE)
l.braceOpen = true
return lexInsideBraces(l)
return lexInsideBraces
case r == '[':
if l.bracketOpen {
return l.errorf("unexpected left bracket %q", r)
@ -559,14 +557,14 @@ func lexValueSequence(l *Lexer) stateFn {
// package of the Go standard library to work for Prometheus-style strings.
// None of the actual escaping/quoting logic was changed in this function - it
// was only modified to integrate with our lexer.
func lexEscape(l *Lexer) {
func lexEscape(l *Lexer) stateFn {
var n int
var base, max uint32
ch := l.next()
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
return
return lexString
case '0', '1', '2', '3', '4', '5', '6', '7':
n, base, max = 3, 8, 255
case 'x':
@ -580,8 +578,10 @@ func lexEscape(l *Lexer) {
n, base, max = 8, 16, unicode.MaxRune
case eof:
l.errorf("escape sequence not terminated")
return lexString
default:
l.errorf("unknown escape sequence %#U", ch)
return lexString
}
var x uint32
@ -590,8 +590,10 @@ func lexEscape(l *Lexer) {
if d >= base {
if ch == eof {
l.errorf("escape sequence not terminated")
return lexString
}
l.errorf("illegal character %#U in escape sequence", ch)
return lexString
}
x = x*base + d
ch = l.next()
@ -601,6 +603,7 @@ func lexEscape(l *Lexer) {
if x > max || 0xD800 <= x && x < 0xE000 {
l.errorf("escape sequence is an invalid Unicode code point")
}
return lexString
}
// digitVal returns the digit value of a rune or 16 in case the rune does not
@ -631,9 +634,10 @@ Loop:
for {
switch l.next() {
case '\\':
lexEscape(l)
return lexEscape
case utf8.RuneError:
return l.errorf("invalid UTF-8 rune")
l.errorf("invalid UTF-8 rune")
return lexString
case eof, '\n':
return l.errorf("unterminated quoted string")
case l.stringOpen:
@ -650,9 +654,11 @@ Loop:
for {
switch l.next() {
case utf8.RuneError:
return l.errorf("invalid UTF-8 rune")
l.errorf("invalid UTF-8 rune")
return lexRawString
case eof:
return l.errorf("unterminated raw string")
l.errorf("unterminated raw string")
return lexRawString
case l.stringOpen:
break Loop
}

View File

@ -697,13 +697,25 @@ func TestLexer(t *testing.T) {
input: test.input,
seriesDesc: test.seriesDesc,
}
l.run()
out := l.Items
var out []Item
for l.state = lexStatements; l.state != nil; {
out = append(out, Item{})
l.NextItem(&out[len(out)-1])
}
lastItem := out[len(out)-1]
if test.fail {
if lastItem.Typ != ERROR {
hasError := false
for _, item := range out {
if item.Typ == ERROR {
hasError = true
}
}
if !hasError {
t.Logf("%d: input %q", i, test.input)
t.Fatalf("expected lexing error but did not fail")
}

View File

@ -29,12 +29,13 @@ import (
)
type parser struct {
lex *Lexer
token Item
lex *Lexer
inject Item
inject ItemType
injecting bool
yyParser yyParserImpl
generatedParserResult interface{}
}
@ -129,21 +130,6 @@ func (p *parser) typecheck(node Node) (err error) {
return nil
}
// next returns the next token.
func (p *parser) next() Item {
t := p.lex.NextItem()
// Skip comments.
for t.Typ == COMMENT {
t = p.lex.NextItem()
}
p.token = t
if p.token.Typ == ERROR {
p.errorf("%s", p.token.Val)
}
return p.token
}
// errorf formats the error and terminates processing.
func (p *parser) errorf(format string, args ...interface{}) {
p.error(errors.Errorf(format, args...))
@ -169,7 +155,7 @@ func (p *parser) unexpected(context string, expected string) {
var errMsg strings.Builder
errMsg.WriteString("unexpected ")
errMsg.WriteString(p.token.desc())
errMsg.WriteString(p.yyParser.lval.item.desc())
if context != "" {
errMsg.WriteString(" in ")
@ -211,16 +197,28 @@ func (p *parser) recover(errp *error) {
//
// For more information, see https://godoc.org/golang.org/x/tools/cmd/goyacc.
func (p *parser) Lex(lval *yySymType) int {
var typ ItemType
if p.injecting {
lval.item = p.inject
p.injecting = false
return int(p.inject)
} else {
lval.item = p.next()
// Skip comments.
for {
p.lex.NextItem(&lval.item)
typ = lval.item.Typ
if typ != COMMENT {
break
}
}
}
typ := lval.item.Typ
if typ == ERROR {
p.errorf("%s", lval.item.Val)
}
if typ == EOF {
lval.item.Typ = EOF
p.InjectItem(0)
}
@ -251,7 +249,7 @@ func (p *parser) InjectItem(typ ItemType) {
panic("cannot inject symbol that isn't start symbol")
}
p.inject = Item{Typ: typ}
p.inject = typ
p.injecting = true
}
func (p *parser) newBinaryExpression(lhs Node, op Item, modifiers Node, rhs Node) *BinaryExpr {
@ -525,7 +523,7 @@ func parseDuration(ds string) (time.Duration, error) {
func (p *parser) parseGenerated(startSymbol ItemType) interface{} {
p.InjectItem(startSymbol)
yyParse(p)
p.yyParser.Parse(p)
return p.generatedParserResult

View File

@ -233,7 +233,7 @@ var testExpr = []struct {
}, {
input: "(1))",
fail: true,
errMsg: "unexpected \")\"",
errMsg: "unexpected right parenthesis ')'",
}, {
input: "((1)",
fail: true,