mirror of https://github.com/hashicorp/consul
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
420 lines
10 KiB
420 lines
10 KiB
package jmespath |
|
|
|
import ( |
|
"bytes" |
|
"encoding/json" |
|
"fmt" |
|
"strconv" |
|
"strings" |
|
"unicode/utf8" |
|
) |
|
|
|
type token struct { |
|
tokenType tokType |
|
value string |
|
position int |
|
length int |
|
} |
|
|
|
type tokType int |
|
|
|
const eof = -1 |
|
|
|
// Lexer contains information about the expression being tokenized. |
|
type Lexer struct { |
|
expression string // The expression provided by the user. |
|
currentPos int // The current position in the string. |
|
lastWidth int // The width of the current rune. This |
|
buf bytes.Buffer // Internal buffer used for building up values. |
|
} |
|
|
|
// SyntaxError is the main error used whenever a lexing or parsing error occurs. |
|
type SyntaxError struct { |
|
msg string // Error message displayed to user |
|
Expression string // Expression that generated a SyntaxError |
|
Offset int // The location in the string where the error occurred |
|
} |
|
|
|
func (e SyntaxError) Error() string { |
|
// In the future, it would be good to underline the specific |
|
// location where the error occurred. |
|
return "SyntaxError: " + e.msg |
|
} |
|
|
|
// HighlightLocation will show where the syntax error occurred. |
|
// It will place a "^" character on a line below the expression |
|
// at the point where the syntax error occurred. |
|
func (e SyntaxError) HighlightLocation() string { |
|
return e.Expression + "\n" + strings.Repeat(" ", e.Offset) + "^" |
|
} |
|
|
|
//go:generate stringer -type=tokType |
|
const ( |
|
tUnknown tokType = iota |
|
tStar |
|
tDot |
|
tFilter |
|
tFlatten |
|
tLparen |
|
tRparen |
|
tLbracket |
|
tRbracket |
|
tLbrace |
|
tRbrace |
|
tOr |
|
tPipe |
|
tNumber |
|
tUnquotedIdentifier |
|
tQuotedIdentifier |
|
tComma |
|
tColon |
|
tLT |
|
tLTE |
|
tGT |
|
tGTE |
|
tEQ |
|
tNE |
|
tJSONLiteral |
|
tStringLiteral |
|
tCurrent |
|
tExpref |
|
tAnd |
|
tNot |
|
tEOF |
|
) |
|
|
|
var basicTokens = map[rune]tokType{ |
|
'.': tDot, |
|
'*': tStar, |
|
',': tComma, |
|
':': tColon, |
|
'{': tLbrace, |
|
'}': tRbrace, |
|
']': tRbracket, // tLbracket not included because it could be "[]" |
|
'(': tLparen, |
|
')': tRparen, |
|
'@': tCurrent, |
|
} |
|
|
|
// Bit mask for [a-zA-Z_] shifted down 64 bits to fit in a single uint64. |
|
// When using this bitmask just be sure to shift the rune down 64 bits |
|
// before checking against identifierStartBits. |
|
const identifierStartBits uint64 = 576460745995190270 |
|
|
|
// Bit mask for [a-zA-Z0-9], 128 bits -> 2 uint64s. |
|
var identifierTrailingBits = [2]uint64{287948901175001088, 576460745995190270} |
|
|
|
var whiteSpace = map[rune]bool{ |
|
' ': true, '\t': true, '\n': true, '\r': true, |
|
} |
|
|
|
func (t token) String() string { |
|
return fmt.Sprintf("Token{%+v, %s, %d, %d}", |
|
t.tokenType, t.value, t.position, t.length) |
|
} |
|
|
|
// NewLexer creates a new JMESPath lexer. |
|
func NewLexer() *Lexer { |
|
lexer := Lexer{} |
|
return &lexer |
|
} |
|
|
|
func (lexer *Lexer) next() rune { |
|
if lexer.currentPos >= len(lexer.expression) { |
|
lexer.lastWidth = 0 |
|
return eof |
|
} |
|
r, w := utf8.DecodeRuneInString(lexer.expression[lexer.currentPos:]) |
|
lexer.lastWidth = w |
|
lexer.currentPos += w |
|
return r |
|
} |
|
|
|
func (lexer *Lexer) back() { |
|
lexer.currentPos -= lexer.lastWidth |
|
} |
|
|
|
func (lexer *Lexer) peek() rune { |
|
t := lexer.next() |
|
lexer.back() |
|
return t |
|
} |
|
|
|
// tokenize takes an expression and returns corresponding tokens. |
|
func (lexer *Lexer) tokenize(expression string) ([]token, error) { |
|
var tokens []token |
|
lexer.expression = expression |
|
lexer.currentPos = 0 |
|
lexer.lastWidth = 0 |
|
loop: |
|
for { |
|
r := lexer.next() |
|
if identifierStartBits&(1<<(uint64(r)-64)) > 0 { |
|
t := lexer.consumeUnquotedIdentifier() |
|
tokens = append(tokens, t) |
|
} else if val, ok := basicTokens[r]; ok { |
|
// Basic single char token. |
|
t := token{ |
|
tokenType: val, |
|
value: string(r), |
|
position: lexer.currentPos - lexer.lastWidth, |
|
length: 1, |
|
} |
|
tokens = append(tokens, t) |
|
} else if r == '-' || (r >= '0' && r <= '9') { |
|
t := lexer.consumeNumber() |
|
tokens = append(tokens, t) |
|
} else if r == '[' { |
|
t := lexer.consumeLBracket() |
|
tokens = append(tokens, t) |
|
} else if r == '"' { |
|
t, err := lexer.consumeQuotedIdentifier() |
|
if err != nil { |
|
return tokens, err |
|
} |
|
tokens = append(tokens, t) |
|
} else if r == '\'' { |
|
t, err := lexer.consumeRawStringLiteral() |
|
if err != nil { |
|
return tokens, err |
|
} |
|
tokens = append(tokens, t) |
|
} else if r == '`' { |
|
t, err := lexer.consumeLiteral() |
|
if err != nil { |
|
return tokens, err |
|
} |
|
tokens = append(tokens, t) |
|
} else if r == '|' { |
|
t := lexer.matchOrElse(r, '|', tOr, tPipe) |
|
tokens = append(tokens, t) |
|
} else if r == '<' { |
|
t := lexer.matchOrElse(r, '=', tLTE, tLT) |
|
tokens = append(tokens, t) |
|
} else if r == '>' { |
|
t := lexer.matchOrElse(r, '=', tGTE, tGT) |
|
tokens = append(tokens, t) |
|
} else if r == '!' { |
|
t := lexer.matchOrElse(r, '=', tNE, tNot) |
|
tokens = append(tokens, t) |
|
} else if r == '=' { |
|
t := lexer.matchOrElse(r, '=', tEQ, tUnknown) |
|
tokens = append(tokens, t) |
|
} else if r == '&' { |
|
t := lexer.matchOrElse(r, '&', tAnd, tExpref) |
|
tokens = append(tokens, t) |
|
} else if r == eof { |
|
break loop |
|
} else if _, ok := whiteSpace[r]; ok { |
|
// Ignore whitespace |
|
} else { |
|
return tokens, lexer.syntaxError(fmt.Sprintf("Unknown char: %s", strconv.QuoteRuneToASCII(r))) |
|
} |
|
} |
|
tokens = append(tokens, token{tEOF, "", len(lexer.expression), 0}) |
|
return tokens, nil |
|
} |
|
|
|
// Consume characters until the ending rune "r" is reached. |
|
// If the end of the expression is reached before seeing the |
|
// terminating rune "r", then an error is returned. |
|
// If no error occurs then the matching substring is returned. |
|
// The returned string will not include the ending rune. |
|
func (lexer *Lexer) consumeUntil(end rune) (string, error) { |
|
start := lexer.currentPos |
|
current := lexer.next() |
|
for current != end && current != eof { |
|
if current == '\\' && lexer.peek() != eof { |
|
lexer.next() |
|
} |
|
current = lexer.next() |
|
} |
|
if lexer.lastWidth == 0 { |
|
// Then we hit an EOF so we never reached the closing |
|
// delimiter. |
|
return "", SyntaxError{ |
|
msg: "Unclosed delimiter: " + string(end), |
|
Expression: lexer.expression, |
|
Offset: len(lexer.expression), |
|
} |
|
} |
|
return lexer.expression[start : lexer.currentPos-lexer.lastWidth], nil |
|
} |
|
|
|
func (lexer *Lexer) consumeLiteral() (token, error) { |
|
start := lexer.currentPos |
|
value, err := lexer.consumeUntil('`') |
|
if err != nil { |
|
return token{}, err |
|
} |
|
value = strings.Replace(value, "\\`", "`", -1) |
|
return token{ |
|
tokenType: tJSONLiteral, |
|
value: value, |
|
position: start, |
|
length: len(value), |
|
}, nil |
|
} |
|
|
|
func (lexer *Lexer) consumeRawStringLiteral() (token, error) { |
|
start := lexer.currentPos |
|
currentIndex := start |
|
current := lexer.next() |
|
for current != '\'' && lexer.peek() != eof { |
|
if current == '\\' && lexer.peek() == '\'' { |
|
chunk := lexer.expression[currentIndex : lexer.currentPos-1] |
|
lexer.buf.WriteString(chunk) |
|
lexer.buf.WriteString("'") |
|
lexer.next() |
|
currentIndex = lexer.currentPos |
|
} |
|
current = lexer.next() |
|
} |
|
if lexer.lastWidth == 0 { |
|
// Then we hit an EOF so we never reached the closing |
|
// delimiter. |
|
return token{}, SyntaxError{ |
|
msg: "Unclosed delimiter: '", |
|
Expression: lexer.expression, |
|
Offset: len(lexer.expression), |
|
} |
|
} |
|
if currentIndex < lexer.currentPos { |
|
lexer.buf.WriteString(lexer.expression[currentIndex : lexer.currentPos-1]) |
|
} |
|
value := lexer.buf.String() |
|
// Reset the buffer so it can reused again. |
|
lexer.buf.Reset() |
|
return token{ |
|
tokenType: tStringLiteral, |
|
value: value, |
|
position: start, |
|
length: len(value), |
|
}, nil |
|
} |
|
|
|
func (lexer *Lexer) syntaxError(msg string) SyntaxError { |
|
return SyntaxError{ |
|
msg: msg, |
|
Expression: lexer.expression, |
|
Offset: lexer.currentPos - 1, |
|
} |
|
} |
|
|
|
// Checks for a two char token, otherwise matches a single character |
|
// token. This is used whenever a two char token overlaps a single |
|
// char token, e.g. "||" -> tPipe, "|" -> tOr. |
|
func (lexer *Lexer) matchOrElse(first rune, second rune, matchedType tokType, singleCharType tokType) token { |
|
start := lexer.currentPos - lexer.lastWidth |
|
nextRune := lexer.next() |
|
var t token |
|
if nextRune == second { |
|
t = token{ |
|
tokenType: matchedType, |
|
value: string(first) + string(second), |
|
position: start, |
|
length: 2, |
|
} |
|
} else { |
|
lexer.back() |
|
t = token{ |
|
tokenType: singleCharType, |
|
value: string(first), |
|
position: start, |
|
length: 1, |
|
} |
|
} |
|
return t |
|
} |
|
|
|
func (lexer *Lexer) consumeLBracket() token { |
|
// There's three options here: |
|
// 1. A filter expression "[?" |
|
// 2. A flatten operator "[]" |
|
// 3. A bare rbracket "[" |
|
start := lexer.currentPos - lexer.lastWidth |
|
nextRune := lexer.next() |
|
var t token |
|
if nextRune == '?' { |
|
t = token{ |
|
tokenType: tFilter, |
|
value: "[?", |
|
position: start, |
|
length: 2, |
|
} |
|
} else if nextRune == ']' { |
|
t = token{ |
|
tokenType: tFlatten, |
|
value: "[]", |
|
position: start, |
|
length: 2, |
|
} |
|
} else { |
|
t = token{ |
|
tokenType: tLbracket, |
|
value: "[", |
|
position: start, |
|
length: 1, |
|
} |
|
lexer.back() |
|
} |
|
return t |
|
} |
|
|
|
func (lexer *Lexer) consumeQuotedIdentifier() (token, error) { |
|
start := lexer.currentPos |
|
value, err := lexer.consumeUntil('"') |
|
if err != nil { |
|
return token{}, err |
|
} |
|
var decoded string |
|
asJSON := []byte("\"" + value + "\"") |
|
if err := json.Unmarshal([]byte(asJSON), &decoded); err != nil { |
|
return token{}, err |
|
} |
|
return token{ |
|
tokenType: tQuotedIdentifier, |
|
value: decoded, |
|
position: start - 1, |
|
length: len(decoded), |
|
}, nil |
|
} |
|
|
|
func (lexer *Lexer) consumeUnquotedIdentifier() token { |
|
// Consume runes until we reach the end of an unquoted |
|
// identifier. |
|
start := lexer.currentPos - lexer.lastWidth |
|
for { |
|
r := lexer.next() |
|
if r < 0 || r > 128 || identifierTrailingBits[uint64(r)/64]&(1<<(uint64(r)%64)) == 0 { |
|
lexer.back() |
|
break |
|
} |
|
} |
|
value := lexer.expression[start:lexer.currentPos] |
|
return token{ |
|
tokenType: tUnquotedIdentifier, |
|
value: value, |
|
position: start, |
|
length: lexer.currentPos - start, |
|
} |
|
} |
|
|
|
func (lexer *Lexer) consumeNumber() token { |
|
// Consume runes until we reach something that's not a number. |
|
start := lexer.currentPos - lexer.lastWidth |
|
for { |
|
r := lexer.next() |
|
if r < '0' || r > '9' { |
|
lexer.back() |
|
break |
|
} |
|
} |
|
value := lexer.expression[start:lexer.currentPos] |
|
return token{ |
|
tokenType: tNumber, |
|
value: value, |
|
position: start, |
|
length: lexer.currentPos - start, |
|
} |
|
}
|
|
|