PromQL: Avoid lexer item copies and allocations (#6584)

* PromQL: Avoid lexer item copies and allocations Signed-off-by: Tobias Guggenmos <tguggenm@redhat.com>
5 years ago · b18b6cb332
parent 097fda1440
commit b18b6cb332
4 changed files with 77 additions and 61 deletions
--- a/promql/lex.go
+++ b/promql/lex.go
@ -217,13 +217,14 @@ type Pos int

 // Lexer holds the state of the scanner.
 type Lexer struct {
-	input   string  // The string being scanned.
-	state   stateFn // The next lexing function to enter.
-	pos     Pos     // Current position in the input.
-	start   Pos     // Start position of this Item.
-	width   Pos     // Width of last rune read from input.
-	lastPos Pos     // Position of most recent Item returned by NextItem.
-	Items   []Item  // Slice buffer of scanned Items.
+	input       string  // The string being scanned.
+	state       stateFn // The next lexing function to enter.
+	pos         Pos     // Current position in the input.
+	start       Pos     // Start position of this Item.
+	width       Pos     // Width of last rune read from input.
+	lastPos     Pos     // Position of most recent Item returned by NextItem.
+	itemp       *Item   // Pointer to where the next scanned item should be placed.
+	scannedItem bool    // Set to true every time an item is scanned.

 	parenDepth  int  // Nesting depth of ( ) exprs.
 	braceOpen   bool // Whether a { is opened.
@ -262,8 +263,9 @@ func (l *Lexer) backup() {

 // emit passes an Item back to the client.
 func (l *Lexer) emit(t ItemType) {
-	l.Items = append(l.Items, Item{t, l.start, l.input[l.start:l.pos]})
+	*l.itemp = Item{t, l.start, l.input[l.start:l.pos]}
 	l.start = l.pos
+	l.scannedItem = true
 }

 // ignore skips over the pending input before this point.
@ -308,23 +310,26 @@ func (l *Lexer) linePosition() int {
 // errorf returns an error token and terminates the scan by passing
 // back a nil pointer that will be the next state, terminating l.NextItem.
 func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
-	l.Items = append(l.Items, Item{ERROR, l.start, fmt.Sprintf(format, args...)})
+	*l.itemp = Item{ERROR, l.start, fmt.Sprintf(format, args...)}
+	l.scannedItem = true
+
 	return nil
 }

-// NextItem returns the next Item from the input.
-func (l *Lexer) NextItem() Item {
-	for len(l.Items) == 0 {
-		if l.state != nil {
+// NextItem writes the next item to the provided address.
+func (l *Lexer) NextItem(itemp *Item) {
+	l.scannedItem = false
+	l.itemp = itemp
+
+	if l.state != nil {
+		for !l.scannedItem {
 			l.state = l.state(l)
-		} else {
-			l.emit(EOF)
 		}
+	} else {
+		l.emit(EOF)
 	}
-	Item := l.Items[0]
-	l.Items = l.Items[1:]
-	l.lastPos = Item.Pos
-	return Item
+
+	l.lastPos = l.itemp.Pos
 }

 // lex creates a new scanner for the input string.
@ -336,13 +341,6 @@ func Lex(input string) *Lexer {
 	return l
 }

-// run runs the state machine for the lexer.
-func (l *Lexer) run() {
-	for l.state = lexStatements; l.state != nil; {
-		l.state = l.state(l)
-	}
-}
-
 // lineComment is the character that starts a line comment.
 const lineComment = "#"

@ -442,7 +440,7 @@ func lexStatements(l *Lexer) stateFn {
 	case r == '{':
 		l.emit(LEFT_BRACE)
 		l.braceOpen = true
-		return lexInsideBraces(l)
+		return lexInsideBraces
 	case r == '[':
 		if l.bracketOpen {
 			return l.errorf("unexpected left bracket %q", r)
@ -559,14 +557,14 @@ func lexValueSequence(l *Lexer) stateFn {
 // package of the Go standard library to work for Prometheus-style strings.
 // None of the actual escaping/quoting logic was changed in this function - it
 // was only modified to integrate with our lexer.
-func lexEscape(l *Lexer) {
+func lexEscape(l *Lexer) stateFn {
 	var n int
 	var base, max uint32

 	ch := l.next()
 	switch ch {
 	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
-		return
+		return lexString
 	case '0', '1', '2', '3', '4', '5', '6', '7':
 		n, base, max = 3, 8, 255
 	case 'x':
@ -580,8 +578,10 @@ func lexEscape(l *Lexer) {
 		n, base, max = 8, 16, unicode.MaxRune
 	case eof:
 		l.errorf("escape sequence not terminated")
+		return lexString
 	default:
 		l.errorf("unknown escape sequence %#U", ch)
+		return lexString
 	}

 	var x uint32
@ -590,8 +590,10 @@ func lexEscape(l *Lexer) {
 		if d >= base {
 			if ch == eof {
 				l.errorf("escape sequence not terminated")
+				return lexString
 			}
 			l.errorf("illegal character %#U in escape sequence", ch)
+			return lexString
 		}
 		x = x*base + d
 		ch = l.next()
@ -601,6 +603,7 @@ func lexEscape(l *Lexer) {
 	if x > max || 0xD800 <= x && x < 0xE000 {
 		l.errorf("escape sequence is an invalid Unicode code point")
 	}
+	return lexString
 }

 // digitVal returns the digit value of a rune or 16 in case the rune does not
@ -631,9 +634,10 @@ Loop:
 	for {
 		switch l.next() {
 		case '\\':
-			lexEscape(l)
+			return lexEscape
 		case utf8.RuneError:
-			return l.errorf("invalid UTF-8 rune")
+			l.errorf("invalid UTF-8 rune")
+			return lexString
 		case eof, '\n':
 			return l.errorf("unterminated quoted string")
 		case l.stringOpen:
@ -650,9 +654,11 @@ Loop:
 	for {
 		switch l.next() {
 		case utf8.RuneError:
-			return l.errorf("invalid UTF-8 rune")
+			l.errorf("invalid UTF-8 rune")
+			return lexRawString
 		case eof:
-			return l.errorf("unterminated raw string")
+			l.errorf("unterminated raw string")
+			return lexRawString
 		case l.stringOpen:
 			break Loop
 		}
--- a/promql/lex_test.go
+++ b/promql/lex_test.go
@ -697,13 +697,25 @@ func TestLexer(t *testing.T) {
 					input:      test.input,
 					seriesDesc: test.seriesDesc,
 				}
-				l.run()

-				out := l.Items
+				var out []Item
+
+				for l.state = lexStatements; l.state != nil; {
+					out = append(out, Item{})
+
+					l.NextItem(&out[len(out)-1])
+				}

 				lastItem := out[len(out)-1]
 				if test.fail {
-					if lastItem.Typ != ERROR {
+					hasError := false
+					for _, item := range out {
+						if item.Typ == ERROR {
+							hasError = true
+						}
+
+					}
+					if !hasError {
 						t.Logf("%d: input %q", i, test.input)
 						t.Fatalf("expected lexing error but did not fail")
 					}
--- a/promql/parse.go
+++ b/promql/parse.go
@ -29,12 +29,13 @@ import (
 )

 type parser struct {
-	lex   *Lexer
-	token Item
+	lex *Lexer

-	inject    Item
+	inject    ItemType
 	injecting bool

+	yyParser yyParserImpl
+
 	generatedParserResult interface{}
 }

@ -129,21 +130,6 @@ func (p *parser) typecheck(node Node) (err error) {
 	return nil
 }

-// next returns the next token.
-func (p *parser) next() Item {
-	t := p.lex.NextItem()
-	// Skip comments.
-	for t.Typ == COMMENT {
-		t = p.lex.NextItem()
-	}
-	p.token = t
-
-	if p.token.Typ == ERROR {
-		p.errorf("%s", p.token.Val)
-	}
-	return p.token
-}
-
 // errorf formats the error and terminates processing.
 func (p *parser) errorf(format string, args ...interface{}) {
 	p.error(errors.Errorf(format, args...))
@ -169,7 +155,7 @@ func (p *parser) unexpected(context string, expected string) {
 	var errMsg strings.Builder

 	errMsg.WriteString("unexpected ")
-	errMsg.WriteString(p.token.desc())
+	errMsg.WriteString(p.yyParser.lval.item.desc())

 	if context != "" {
 		errMsg.WriteString(" in ")
@ -211,16 +197,28 @@ func (p *parser) recover(errp *error) {
 //
 // For more information, see https://godoc.org/golang.org/x/tools/cmd/goyacc.
 func (p *parser) Lex(lval *yySymType) int {
+	var typ ItemType
+
 	if p.injecting {
-		lval.item = p.inject
 		p.injecting = false
+		return int(p.inject)
 	} else {
-		lval.item = p.next()
+		// Skip comments.
+		for {
+			p.lex.NextItem(&lval.item)
+			typ = lval.item.Typ
+			if typ != COMMENT {
+				break
+			}
+		}
 	}

-	typ := lval.item.Typ
+	if typ == ERROR {
+		p.errorf("%s", lval.item.Val)
+	}

 	if typ == EOF {
+		lval.item.Typ = EOF
 		p.InjectItem(0)
 	}

@ -251,7 +249,7 @@ func (p *parser) InjectItem(typ ItemType) {
 		panic("cannot inject symbol that isn't start symbol")
 	}

-	p.inject = Item{Typ: typ}
+	p.inject = typ
 	p.injecting = true
 }
 func (p *parser) newBinaryExpression(lhs Node, op Item, modifiers Node, rhs Node) *BinaryExpr {
@ -525,7 +523,7 @@ func parseDuration(ds string) (time.Duration, error) {
 func (p *parser) parseGenerated(startSymbol ItemType) interface{} {
 	p.InjectItem(startSymbol)

-	yyParse(p)
+	p.yyParser.Parse(p)

 	return p.generatedParserResult

--- a/promql/parse_test.go
+++ b/promql/parse_test.go
@ -233,7 +233,7 @@ var testExpr = []struct {
 	}, {
 		input:  "(1))",
 		fail:   true,
-		errMsg: "unexpected \")\"",
+		errMsg: "unexpected right parenthesis ')'",
 	}, {
 		input:  "((1)",
 		fail:   true,