Merge pull request #28112 from liggitt/field-selector-value-escaping

Automatic merge from submit-queue Allow fieldSelectors to match arbitrary values Field selectors are intended to be able to match arbitrary data for specific field names. Because of how field selectors are parsed, data containing `,` characters is not possible to match today, and data containing leading `=` characters requires using the `==` operator to ensure the leading `=` is preserved. This PR adds the ability to escape/unescape those special characters in field selector values so they can be parsed unambiguously Process for escaping arbitrary data in field selector values (`fields.EscapeValue` helper provided): 1. Prefix literal `\` characters with a `\` 2. Prefix `,` characters with a `\` 3. Prefix `=` characters with a `\` When unescaping a field selector value (`fields.UnescapeValue` helper provided), the following escape sequences are honored: - `\\` -> `\` - `\,` -> `,` - `\=` -> `=` Any other instances of `\` result in a parse error Any unescaped instances of `,` and `=` in field selector values result in a parse error Compatibility: - `,` and `=` characters are currently unusable in fieldSelector values, so the `\,` and `\=` escape sequences have no compatibility impact - `\\` changes from being interpreted as `\\` to `\`, and any other uses of `\` result in errors (this is mostly theoretical; I couldn't find any field-selector-enabled fields which is currently using field selectors to match `\` values) ``` Field selectors may now match values containing the characters `,` `=` or `\` by escaping them with a `\` character. ```
2017-01-23 23:54:35 -08:00 · 2017-01-23 23:54:35 -08:00 · bcc1dc5c15
parent 43286a82c6 6f5598b1cb
commit bcc1dc5c15
2 changed files with 272 additions and 24 deletions
--- a/staging/src/k8s.io/apimachinery/pkg/fields/selector.go
+++ b/staging/src/k8s.io/apimachinery/pkg/fields/selector.go
@ -17,6 +17,7 @@ limitations under the License.
 package fields

 import (
+	"bytes"
 	"fmt"
 	"sort"
 	"strings"
@ -90,7 +91,7 @@ func (t *hasTerm) Requirements() Requirements {
 }

 func (t *hasTerm) String() string {
-	return fmt.Sprintf("%v=%v", t.field, t.value)
+	return fmt.Sprintf("%v=%v", t.field, EscapeValue(t.value))
 }

 type notHasTerm struct {
@ -126,7 +127,7 @@ func (t *notHasTerm) Requirements() Requirements {
 }

 func (t *notHasTerm) String() string {
-	return fmt.Sprintf("%v!=%v", t.field, t.value)
+	return fmt.Sprintf("%v!=%v", t.field, EscapeValue(t.value))
 }

 type andTerm []Selector
@ -212,6 +213,81 @@ func SelectorFromSet(ls Set) Selector {
 	return andTerm(items)
 }

+// valueEscaper prefixes \,= characters with a backslash
+var valueEscaper = strings.NewReplacer(
+	// escape \ characters
+	`\`, `\\`,
+	// then escape , and = characters to allow unambiguous parsing of the value in a fieldSelector
+	`,`, `\,`,
+	`=`, `\=`,
+)
+
+// Escapes an arbitrary literal string for use as a fieldSelector value
+func EscapeValue(s string) string {
+	return valueEscaper.Replace(s)
+}
+
+// InvalidEscapeSequence indicates an error occurred unescaping a field selector
+type InvalidEscapeSequence struct {
+	sequence string
+}
+
+func (i InvalidEscapeSequence) Error() string {
+	return fmt.Sprintf("invalid field selector: invalid escape sequence: %s", i.sequence)
+}
+
+// UnescapedRune indicates an error occurred unescaping a field selector
+type UnescapedRune struct {
+	r rune
+}
+
+func (i UnescapedRune) Error() string {
+	return fmt.Sprintf("invalid field selector: unescaped character in value: %v", i.r)
+}
+
+// Unescapes a fieldSelector value and returns the original literal value.
+// May return the original string if it contains no escaped or special characters.
+func UnescapeValue(s string) (string, error) {
+	// if there's no escaping or special characters, just return to avoid allocation
+	if !strings.ContainsAny(s, `\,=`) {
+		return s, nil
+	}
+
+	v := bytes.NewBuffer(make([]byte, 0, len(s)))
+	inSlash := false
+	for _, c := range s {
+		if inSlash {
+			switch c {
+			case '\\', ',', '=':
+				// omit the \ for recognized escape sequences
+				v.WriteRune(c)
+			default:
+				// error on unrecognized escape sequences
+				return "", InvalidEscapeSequence{sequence: string([]rune{'\\', c})}
+			}
+			inSlash = false
+			continue
+		}
+
+		switch c {
+		case '\\':
+			inSlash = true
+		case ',', '=':
+			// unescaped , and = characters are not allowed in field selector values
+			return "", UnescapedRune{r: c}
+		default:
+			v.WriteRune(c)
+		}
+	}
+
+	// Ending with a single backslash is an invalid sequence
+	if inSlash {
+		return "", InvalidEscapeSequence{sequence: "\\"}
+	}
+
+	return v.String(), nil
+}
+
 // ParseSelectorOrDie takes a string representing a selector and returns an
 // object suitable for matching, or panic when an error occur.
 func ParseSelectorOrDie(s string) Selector {
@ -239,29 +315,83 @@ func ParseAndTransformSelector(selector string, fn TransformFunc) (Selector, err
 // Function to transform selectors.
 type TransformFunc func(field, value string) (newField, newValue string, err error)

-func try(selectorPiece, op string) (lhs, rhs string, ok bool) {
-	pieces := strings.Split(selectorPiece, op)
-	if len(pieces) == 2 {
-		return pieces[0], pieces[1], true
+// splitTerms returns the comma-separated terms contained in the given fieldSelector.
+// Backslash-escaped commas are treated as data instead of delimiters, and are included in the returned terms, with the leading backslash preserved.
+func splitTerms(fieldSelector string) []string {
+	if len(fieldSelector) == 0 {
+		return nil
 	}
-	return "", "", false
+
+	terms := make([]string, 0, 1)
+	startIndex := 0
+	inSlash := false
+	for i, c := range fieldSelector {
+		switch {
+		case inSlash:
+			inSlash = false
+		case c == '\\':
+			inSlash = true
+		case c == ',':
+			terms = append(terms, fieldSelector[startIndex:i])
+			startIndex = i + 1
+		}
+	}
+
+	terms = append(terms, fieldSelector[startIndex:])
+
+	return terms
+}
+
+const (
+	notEqualOperator    = "!="
+	doubleEqualOperator = "=="
+	equalOperator       = "="
+)
+
+// termOperators holds the recognized operators supported in fieldSelectors.
+// doubleEqualOperator and equal are equivalent, but doubleEqualOperator is checked first
+// to avoid leaving a leading = character on the rhs value.
+var termOperators = []string{notEqualOperator, doubleEqualOperator, equalOperator}
+
+// splitTerm returns the lhs, operator, and rhs parsed from the given term, along with an indicator of whether the parse was successful.
+// no escaping of special characters is supported in the lhs value, so the first occurance of a recognized operator is used as the split point.
+// the literal rhs is returned, and the caller is responsible for applying any desired unescaping.
+func splitTerm(term string) (lhs, op, rhs string, ok bool) {
+	for i := range term {
+		remaining := term[i:]
+		for _, op := range termOperators {
+			if strings.HasPrefix(remaining, op) {
+				return term[0:i], op, term[i+len(op):], true
+			}
+		}
+	}
+	return "", "", "", false
 }

 func parseSelector(selector string, fn TransformFunc) (Selector, error) {
-	parts := strings.Split(selector, ",")
+	parts := splitTerms(selector)
 	sort.StringSlice(parts).Sort()
 	var items []Selector
 	for _, part := range parts {
 		if part == "" {
 			continue
 		}
-		if lhs, rhs, ok := try(part, "!="); ok {
-			items = append(items, &notHasTerm{field: lhs, value: rhs})
-		} else if lhs, rhs, ok := try(part, "=="); ok {
-			items = append(items, &hasTerm{field: lhs, value: rhs})
-		} else if lhs, rhs, ok := try(part, "="); ok {
-			items = append(items, &hasTerm{field: lhs, value: rhs})
-		} else {
+		lhs, op, rhs, ok := splitTerm(part)
+		if !ok {
+			return nil, fmt.Errorf("invalid selector: '%s'; can't understand '%s'", selector, part)
+		}
+		unescapedRHS, err := UnescapeValue(rhs)
+		if err != nil {
+			return nil, err
+		}
+		switch op {
+		case notEqualOperator:
+			items = append(items, &notHasTerm{field: lhs, value: unescapedRHS})
+		case doubleEqualOperator:
+			items = append(items, &hasTerm{field: lhs, value: unescapedRHS})
+		case equalOperator:
+			items = append(items, &hasTerm{field: lhs, value: unescapedRHS})
+		default:
 			return nil, fmt.Errorf("invalid selector: '%s'; can't understand '%s'", selector, part)
 		}
 	}
--- a/staging/src/k8s.io/apimachinery/pkg/fields/selector_test.go
+++ b/staging/src/k8s.io/apimachinery/pkg/fields/selector_test.go
@ -17,18 +17,134 @@ limitations under the License.
 package fields

 import (
+	"reflect"
 	"testing"
 )

+func TestSplitTerms(t *testing.T) {
+	testcases := map[string][]string{
+		// Simple selectors
+		`a`:                            {`a`},
+		`a=avalue`:                     {`a=avalue`},
+		`a=avalue,b=bvalue`:            {`a=avalue`, `b=bvalue`},
+		`a=avalue,b==bvalue,c!=cvalue`: {`a=avalue`, `b==bvalue`, `c!=cvalue`},
+
+		// Empty terms
+		``:     nil,
+		`a=a,`: {`a=a`, ``},
+		`,a=a`: {``, `a=a`},
+
+		// Escaped values
+		`k=\,,k2=v2`:   {`k=\,`, `k2=v2`},   // escaped comma in value
+		`k=\\,k2=v2`:   {`k=\\`, `k2=v2`},   // escaped backslash, unescaped comma
+		`k=\\\,,k2=v2`: {`k=\\\,`, `k2=v2`}, // escaped backslash and comma
+		`k=\a\b\`:      {`k=\a\b\`},         // non-escape sequences
+		`k=\`:          {`k=\`},             // orphan backslash
+
+		// Multi-byte
+		`함=수,목=록`: {`함=수`, `목=록`},
+	}
+
+	for selector, expectedTerms := range testcases {
+		if terms := splitTerms(selector); !reflect.DeepEqual(terms, expectedTerms) {
+			t.Errorf("splitSelectors(`%s`): Expected\n%#v\ngot\n%#v", selector, expectedTerms, terms)
+		}
+	}
+}
+
+func TestSplitTerm(t *testing.T) {
+	testcases := map[string]struct {
+		lhs string
+		op  string
+		rhs string
+		ok  bool
+	}{
+		// Simple terms
+		`a=value`:  {lhs: `a`, op: `=`, rhs: `value`, ok: true},
+		`b==value`: {lhs: `b`, op: `==`, rhs: `value`, ok: true},
+		`c!=value`: {lhs: `c`, op: `!=`, rhs: `value`, ok: true},
+
+		// Empty or invalid terms
+		``:  {lhs: ``, op: ``, rhs: ``, ok: false},
+		`a`: {lhs: ``, op: ``, rhs: ``, ok: false},
+
+		// Escaped values
+		`k=\,`:          {lhs: `k`, op: `=`, rhs: `\,`, ok: true},
+		`k=\=`:          {lhs: `k`, op: `=`, rhs: `\=`, ok: true},
+		`k=\\\a\b\=\,\`: {lhs: `k`, op: `=`, rhs: `\\\a\b\=\,\`, ok: true},
+
+		// Multi-byte
+		`함=수`: {lhs: `함`, op: `=`, rhs: `수`, ok: true},
+	}
+
+	for term, expected := range testcases {
+		lhs, op, rhs, ok := splitTerm(term)
+		if lhs != expected.lhs || op != expected.op || rhs != expected.rhs || ok != expected.ok {
+			t.Errorf(
+				"splitTerm(`%s`): Expected\n%s,%s,%s,%v\nGot\n%s,%s,%s,%v",
+				term,
+				expected.lhs, expected.op, expected.rhs, expected.ok,
+				lhs, op, rhs, ok,
+			)
+		}
+	}
+}
+
+func TestEscapeValue(t *testing.T) {
+	// map values to their normalized escaped values
+	testcases := map[string]string{
+		``:      ``,
+		`a`:     `a`,
+		`=`:     `\=`,
+		`,`:     `\,`,
+		`\`:     `\\`,
+		`\=\,\`: `\\\=\\\,\\`,
+	}
+
+	for unescapedValue, escapedValue := range testcases {
+		actualEscaped := EscapeValue(unescapedValue)
+		if actualEscaped != escapedValue {
+			t.Errorf("EscapeValue(%s): expected %s, got %s", unescapedValue, escapedValue, actualEscaped)
+		}
+
+		actualUnescaped, err := UnescapeValue(escapedValue)
+		if err != nil {
+			t.Errorf("UnescapeValue(%s): unexpected error %v", escapedValue, err)
+		}
+		if actualUnescaped != unescapedValue {
+			t.Errorf("UnescapeValue(%s): expected %s, got %s", escapedValue, unescapedValue, actualUnescaped)
+		}
+	}
+
+	// test invalid escape sequences
+	invalidTestcases := []string{
+		`\`,   // orphan slash is invalid
+		`\\\`, // orphan slash is invalid
+		`\a`,  // unrecognized escape sequence is invalid
+	}
+	for _, invalidValue := range invalidTestcases {
+		_, err := UnescapeValue(invalidValue)
+		if _, ok := err.(InvalidEscapeSequence); !ok || err == nil {
+			t.Errorf("UnescapeValue(%s): expected invalid escape sequence error, got %#v", invalidValue, err)
+		}
+	}
+}
+
 func TestSelectorParse(t *testing.T) {
 	testGoodStrings := []string{
 		"x=a,y=b,z=c",
 		"",
 		"x!=a,y=b",
+		`x=a||y\=b`,
+		`x=a\=\=b`,
 	}
 	testBadStrings := []string{
 		"x=a||y=b",
 		"x==a==b",
+		"x=a,b",
+		"x in (a)",
+		"x in (a,b,c)",
+		"x",
 	}
 	for _, test := range testGoodStrings {
 		lq, err := ParseSelector(test)
@ -99,16 +215,18 @@ func TestSelectorMatches(t *testing.T) {
 	expectNoMatch(t, "x=y,z=w", Set{"x": "w", "z": "w"})
 	expectNoMatch(t, "x!=y,z!=w", Set{"x": "z", "z": "w"})

-	labelset := Set{
-		"foo": "bar",
-		"baz": "blah",
+	fieldset := Set{
+		"foo":     "bar",
+		"baz":     "blah",
+		"complex": `=value\,\`,
 	}
-	expectMatch(t, "foo=bar", labelset)
-	expectMatch(t, "baz=blah", labelset)
-	expectMatch(t, "foo=bar,baz=blah", labelset)
-	expectNoMatch(t, "foo=blah", labelset)
-	expectNoMatch(t, "baz=bar", labelset)
-	expectNoMatch(t, "foo=bar,foobar=bar,baz=blah", labelset)
+	expectMatch(t, "foo=bar", fieldset)
+	expectMatch(t, "baz=blah", fieldset)
+	expectMatch(t, "foo=bar,baz=blah", fieldset)
+	expectMatch(t, `foo=bar,baz=blah,complex=\=value\\\,\\`, fieldset)
+	expectNoMatch(t, "foo=blah", fieldset)
+	expectNoMatch(t, "baz=bar", fieldset)
+	expectNoMatch(t, "foo=bar,foobar=bar,baz=blah", fieldset)
 }

 func TestOneTermEqualSelector(t *testing.T) {