k3s/vendor/go.starlark.net/syntax/quote.go

// Copyright 2017 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package syntax

// Starlark quoted string utilities.

import (
	"fmt"
	"strconv"
	"strings"
)

// unesc maps single-letter chars following \ to their actual values.
var unesc = [256]byte{
	'a':  '\a',
	'b':  '\b',
	'f':  '\f',
	'n':  '\n',
	'r':  '\r',
	't':  '\t',
	'v':  '\v',
	'\\': '\\',
	'\'': '\'',
	'"':  '"',
}

// esc maps escape-worthy bytes to the char that should follow \.
var esc = [256]byte{
	'\a': 'a',
	'\b': 'b',
	'\f': 'f',
	'\n': 'n',
	'\r': 'r',
	'\t': 't',
	'\v': 'v',
	'\\': '\\',
	'\'': '\'',
	'"':  '"',
}

// notEsc is a list of characters that can follow a \ in a string value
// without having to escape the \. That is, since ( is in this list, we
// quote the Go string "foo\\(bar" as the Python literal "foo\(bar".
// This really does happen in BUILD files, especially in strings
// being used as shell arguments containing regular expressions.
const notEsc = " !#$%&()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~"

// unquote unquotes the quoted string, returning the actual
// string value, whether the original was triple-quoted, and
// an error describing invalid input.
func unquote(quoted string) (s string, triple bool, err error) {
	// Check for raw prefix: means don't interpret the inner \.
	raw := false
	if strings.HasPrefix(quoted, "r") {
		raw = true
		quoted = quoted[1:]
	}

	if len(quoted) < 2 {
		err = fmt.Errorf("string literal too short")
		return
	}

	if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] {
		err = fmt.Errorf("string literal has invalid quotes")
		return
	}

	// Check for triple quoted string.
	quote := quoted[0]
	if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] {
		triple = true
		quoted = quoted[3 : len(quoted)-3]
	} else {
		quoted = quoted[1 : len(quoted)-1]
	}

	// Now quoted is the quoted data, but no quotes.
	// If we're in raw mode or there are no escapes or
	// carriage returns, we're done.
	var unquoteChars string
	if raw {
		unquoteChars = "\r"
	} else {
		unquoteChars = "\\\r"
	}
	if !strings.ContainsAny(quoted, unquoteChars) {
		s = quoted
		return
	}

	// Otherwise process quoted string.
	// Each iteration processes one escape sequence along with the
	// plain text leading up to it.
	buf := new(strings.Builder)
	for {
		// Remove prefix before escape sequence.
		i := strings.IndexAny(quoted, unquoteChars)
		if i < 0 {
			i = len(quoted)
		}
		buf.WriteString(quoted[:i])
		quoted = quoted[i:]

		if len(quoted) == 0 {
			break
		}

		// Process carriage return.
		if quoted[0] == '\r' {
			buf.WriteByte('\n')
			if len(quoted) > 1 && quoted[1] == '\n' {
				quoted = quoted[2:]
			} else {
				quoted = quoted[1:]
			}
			continue
		}

		// Process escape sequence.
		if len(quoted) == 1 {
			err = fmt.Errorf(`truncated escape sequence \`)
			return
		}

		switch quoted[1] {
		default:
			// In Python, if \z (for some byte z) is not a known escape sequence
			// then it appears as literal text in the string.
			buf.WriteString(quoted[:2])
			quoted = quoted[2:]

		case '\n':
			// Ignore the escape and the line break.
			quoted = quoted[2:]

		case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"':
			// One-char escape
			buf.WriteByte(unesc[quoted[1]])
			quoted = quoted[2:]

		case '0', '1', '2', '3', '4', '5', '6', '7':
			// Octal escape, up to 3 digits.
			n := int(quoted[1] - '0')
			quoted = quoted[2:]
			for i := 1; i < 3; i++ {
				if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] {
					break
				}
				n = n*8 + int(quoted[0]-'0')
				quoted = quoted[1:]
			}
			if n >= 256 {
				// NOTE: Python silently discards the high bit,
				// so that '\541' == '\141' == 'a'.
				// Let's see if we can avoid doing that in BUILD files.
				err = fmt.Errorf(`invalid escape sequence \%03o`, n)
				return
			}
			buf.WriteByte(byte(n))

		case 'x':
			// Hexadecimal escape, exactly 2 digits.
			if len(quoted) < 4 {
				err = fmt.Errorf(`truncated escape sequence %s`, quoted)
				return
			}
			n, err1 := strconv.ParseUint(quoted[2:4], 16, 0)
			if err1 != nil {
				err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
				return
			}
			buf.WriteByte(byte(n))
			quoted = quoted[4:]
		}
	}

	s = buf.String()
	return
}

// indexByte returns the index of the first instance of b in s, or else -1.
func indexByte(s string, b byte) int {
	for i := 0; i < len(s); i++ {
		if s[i] == b {
			return i
		}
	}
	return -1
}

// hex is a list of the hexadecimal digits, for use in quoting.
// We always print lower-case hexadecimal.
const hex = "0123456789abcdef"

// quote returns the quoted form of the string value "x".
// If triple is true, quote uses the triple-quoted form """x""".
func quote(unquoted string, triple bool) string {
	q := `"`
	if triple {
		q = `"""`
	}

	buf := new(strings.Builder)
	buf.WriteString(q)

	for i := 0; i < len(unquoted); i++ {
		c := unquoted[i]
		if c == '"' && triple && (i+1 < len(unquoted) && unquoted[i+1] != '"' || i+2 < len(unquoted) && unquoted[i+2] != '"') {
			// Can pass up to two quotes through, because they are followed by a non-quote byte.
			buf.WriteByte(c)
			if i+1 < len(unquoted) && unquoted[i+1] == '"' {
				buf.WriteByte(c)
				i++
			}
			continue
		}
		if triple && c == '\n' {
			// Can allow newline in triple-quoted string.
			buf.WriteByte(c)
			continue
		}
		if c == '\'' {
			// Can allow ' since we always use ".
			buf.WriteByte(c)
			continue
		}
		if c == '\\' {
			if i+1 < len(unquoted) && indexByte(notEsc, unquoted[i+1]) >= 0 {
				// Can pass \ through when followed by a byte that
				// known not to be a valid escape sequence and also
				// that does not trigger an escape sequence of its own.
				// Use this, because various BUILD files do.
				buf.WriteByte('\\')
				buf.WriteByte(unquoted[i+1])
				i++
				continue
			}
		}
		if esc[c] != 0 {
			buf.WriteByte('\\')
			buf.WriteByte(esc[c])
			continue
		}
		if c < 0x20 || c >= 0x80 {
			// BUILD files are supposed to be Latin-1, so escape all control and high bytes.
			// I'd prefer to use \x here, but Blaze does not implement
			// \x in quoted strings (b/7272572).
			buf.WriteByte('\\')
			buf.WriteByte(hex[c>>6]) // actually octal but reusing hex digits 0-7.
			buf.WriteByte(hex[(c>>3)&7])
			buf.WriteByte(hex[c&7])
			/*
				buf.WriteByte('\\')
				buf.WriteByte('x')
				buf.WriteByte(hex[c>>4])
				buf.WriteByte(hex[c&0xF])
			*/
			continue
		}
		buf.WriteByte(c)
		continue
	}

	buf.WriteString(q)
	return buf.String()
}