mirror of https://github.com/prometheus/prometheus
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
223 lines
5.1 KiB
223 lines
5.1 KiB
// Copyright 2015 The Prometheus Authors |
|
// Licensed under the Apache License, Version 2.0 (the "License"); |
|
// you may not use this file except in compliance with the License. |
|
// You may obtain a copy of the License at |
|
// |
|
// http://www.apache.org/licenses/LICENSE-2.0 |
|
// |
|
// Unless required by applicable law or agreed to in writing, software |
|
// distributed under the License is distributed on an "AS IS" BASIS, |
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
// See the License for the specific language governing permissions and |
|
// limitations under the License. |
|
|
|
package strutil |
|
|
|
import ( |
|
"errors" |
|
"unicode/utf8" |
|
) |
|
|
|
// ErrSyntax indicates that a value does not have the right syntax for the target type. |
|
var ErrSyntax = errors.New("invalid syntax") |
|
|
|
// Unquote interprets s as a single-quoted, double-quoted, or backquoted |
|
// Prometheus query language string literal, returning the string value that s |
|
// quotes. |
|
// |
|
// NOTE: This function as well as the necessary helper functions below |
|
// (unquoteChar, contains, unhex) and associated tests have been adapted from |
|
// the corresponding functions in the "strconv" package of the Go standard |
|
// library to work for Prometheus-style strings. Go's special-casing for single |
|
// quotes was removed and single quoted strings are now treated the same as |
|
// double quoted ones. |
|
func Unquote(s string) (t string, err error) { |
|
n := len(s) |
|
if n < 2 { |
|
return "", ErrSyntax |
|
} |
|
quote := s[0] |
|
if quote != s[n-1] { |
|
return "", ErrSyntax |
|
} |
|
s = s[1 : n-1] |
|
|
|
if quote == '`' { |
|
if contains(s, '`') { |
|
return "", ErrSyntax |
|
} |
|
return s, nil |
|
} |
|
if quote != '"' && quote != '\'' { |
|
return "", ErrSyntax |
|
} |
|
if contains(s, '\n') { |
|
return "", ErrSyntax |
|
} |
|
|
|
// Is it trivial? Avoid allocation. |
|
if !contains(s, '\\') && !contains(s, quote) { |
|
return s, nil |
|
} |
|
|
|
var runeTmp [utf8.UTFMax]byte |
|
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. |
|
for len(s) > 0 { |
|
c, multibyte, ss, err := unquoteChar(s, quote) |
|
if err != nil { |
|
return "", err |
|
} |
|
s = ss |
|
if c < utf8.RuneSelf || !multibyte { |
|
buf = append(buf, byte(c)) |
|
} else { |
|
n := utf8.EncodeRune(runeTmp[:], c) |
|
buf = append(buf, runeTmp[:n]...) |
|
} |
|
} |
|
return string(buf), nil |
|
} |
|
|
|
// unquoteChar decodes the first character or byte in the escaped string |
|
// or character literal represented by the string s. |
|
// It returns four values: |
|
// |
|
// 1) value, the decoded Unicode code point or byte value; |
|
// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; |
|
// 3) tail, the remainder of the string after the character; and |
|
// 4) an error that will be nil if the character is syntactically valid. |
|
// |
|
// The second argument, quote, specifies the type of literal being parsed |
|
// and therefore which escaped quote character is permitted. |
|
// If set to a single quote, it permits the sequence \' and disallows unescaped '. |
|
// If set to a double quote, it permits \" and disallows unescaped ". |
|
// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. |
|
func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { |
|
// easy cases |
|
switch c := s[0]; { |
|
case c == quote && (quote == '\'' || quote == '"'): |
|
err = ErrSyntax |
|
return |
|
case c >= utf8.RuneSelf: |
|
r, size := utf8.DecodeRuneInString(s) |
|
return r, true, s[size:], nil |
|
case c != '\\': |
|
return rune(s[0]), false, s[1:], nil |
|
} |
|
|
|
// Hard case: c is backslash. |
|
if len(s) <= 1 { |
|
err = ErrSyntax |
|
return |
|
} |
|
c := s[1] |
|
s = s[2:] |
|
|
|
switch c { |
|
case 'a': |
|
value = '\a' |
|
case 'b': |
|
value = '\b' |
|
case 'f': |
|
value = '\f' |
|
case 'n': |
|
value = '\n' |
|
case 'r': |
|
value = '\r' |
|
case 't': |
|
value = '\t' |
|
case 'v': |
|
value = '\v' |
|
case 'x', 'u', 'U': |
|
n := 0 |
|
switch c { |
|
case 'x': |
|
n = 2 |
|
case 'u': |
|
n = 4 |
|
case 'U': |
|
n = 8 |
|
} |
|
var v rune |
|
if len(s) < n { |
|
err = ErrSyntax |
|
return |
|
} |
|
for j := 0; j < n; j++ { |
|
x, ok := unhex(s[j]) |
|
if !ok { |
|
err = ErrSyntax |
|
return |
|
} |
|
v = v<<4 | x |
|
} |
|
s = s[n:] |
|
if c == 'x' { |
|
// Single-byte string, possibly not UTF-8. |
|
value = v |
|
break |
|
} |
|
if v > utf8.MaxRune { |
|
err = ErrSyntax |
|
return |
|
} |
|
value = v |
|
multibyte = true |
|
case '0', '1', '2', '3', '4', '5', '6', '7': |
|
v := rune(c) - '0' |
|
if len(s) < 2 { |
|
err = ErrSyntax |
|
return |
|
} |
|
for j := 0; j < 2; j++ { // One digit already; two more. |
|
x := rune(s[j]) - '0' |
|
if x < 0 || x > 7 { |
|
err = ErrSyntax |
|
return |
|
} |
|
v = (v << 3) | x |
|
} |
|
s = s[2:] |
|
if v > 255 { |
|
err = ErrSyntax |
|
return |
|
} |
|
value = v |
|
case '\\': |
|
value = '\\' |
|
case '\'', '"': |
|
if c != quote { |
|
err = ErrSyntax |
|
return |
|
} |
|
value = rune(c) |
|
default: |
|
err = ErrSyntax |
|
return |
|
} |
|
tail = s |
|
return |
|
} |
|
|
|
// contains reports whether the string contains the byte c. |
|
func contains(s string, c byte) bool { |
|
for i := 0; i < len(s); i++ { |
|
if s[i] == c { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
func unhex(b byte) (v rune, ok bool) { |
|
c := rune(b) |
|
switch { |
|
case '0' <= c && c <= '9': |
|
return c - '0', true |
|
case 'a' <= c && c <= 'f': |
|
return c - 'a' + 10, true |
|
case 'A' <= c && c <= 'F': |
|
return c - 'A' + 10, true |
|
} |
|
return |
|
}
|
|
|