mirror of https://github.com/EasyDarwin/EasyDarwin
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
215 lines
4.7 KiB
215 lines
4.7 KiB
package jsoniter |
|
|
|
import ( |
|
"fmt" |
|
"unicode/utf16" |
|
) |
|
|
|
// ReadString read string from iterator |
|
func (iter *Iterator) ReadString() (ret string) { |
|
c := iter.nextToken() |
|
if c == '"' { |
|
for i := iter.head; i < iter.tail; i++ { |
|
c := iter.buf[i] |
|
if c == '"' { |
|
ret = string(iter.buf[iter.head:i]) |
|
iter.head = i + 1 |
|
return ret |
|
} else if c == '\\' { |
|
break |
|
} else if c < ' ' { |
|
iter.ReportError("ReadString", |
|
fmt.Sprintf(`invalid control character found: %d`, c)) |
|
return |
|
} |
|
} |
|
return iter.readStringSlowPath() |
|
} else if c == 'n' { |
|
iter.skipThreeBytes('u', 'l', 'l') |
|
return "" |
|
} |
|
iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c})) |
|
return |
|
} |
|
|
|
func (iter *Iterator) readStringSlowPath() (ret string) { |
|
var str []byte |
|
var c byte |
|
for iter.Error == nil { |
|
c = iter.readByte() |
|
if c == '"' { |
|
return string(str) |
|
} |
|
if c == '\\' { |
|
c = iter.readByte() |
|
str = iter.readEscapedChar(c, str) |
|
} else { |
|
str = append(str, c) |
|
} |
|
} |
|
iter.ReportError("readStringSlowPath", "unexpected end of input") |
|
return |
|
} |
|
|
|
func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte { |
|
switch c { |
|
case 'u': |
|
r := iter.readU4() |
|
if utf16.IsSurrogate(r) { |
|
c = iter.readByte() |
|
if iter.Error != nil { |
|
return nil |
|
} |
|
if c != '\\' { |
|
iter.unreadByte() |
|
str = appendRune(str, r) |
|
return str |
|
} |
|
c = iter.readByte() |
|
if iter.Error != nil { |
|
return nil |
|
} |
|
if c != 'u' { |
|
str = appendRune(str, r) |
|
return iter.readEscapedChar(c, str) |
|
} |
|
r2 := iter.readU4() |
|
if iter.Error != nil { |
|
return nil |
|
} |
|
combined := utf16.DecodeRune(r, r2) |
|
if combined == '\uFFFD' { |
|
str = appendRune(str, r) |
|
str = appendRune(str, r2) |
|
} else { |
|
str = appendRune(str, combined) |
|
} |
|
} else { |
|
str = appendRune(str, r) |
|
} |
|
case '"': |
|
str = append(str, '"') |
|
case '\\': |
|
str = append(str, '\\') |
|
case '/': |
|
str = append(str, '/') |
|
case 'b': |
|
str = append(str, '\b') |
|
case 'f': |
|
str = append(str, '\f') |
|
case 'n': |
|
str = append(str, '\n') |
|
case 'r': |
|
str = append(str, '\r') |
|
case 't': |
|
str = append(str, '\t') |
|
default: |
|
iter.ReportError("readEscapedChar", |
|
`invalid escape char after \`) |
|
return nil |
|
} |
|
return str |
|
} |
|
|
|
// ReadStringAsSlice read string from iterator without copying into string form. |
|
// The []byte can not be kept, as it will change after next iterator call. |
|
func (iter *Iterator) ReadStringAsSlice() (ret []byte) { |
|
c := iter.nextToken() |
|
if c == '"' { |
|
for i := iter.head; i < iter.tail; i++ { |
|
// require ascii string and no escape |
|
// for: field name, base64, number |
|
if iter.buf[i] == '"' { |
|
// fast path: reuse the underlying buffer |
|
ret = iter.buf[iter.head:i] |
|
iter.head = i + 1 |
|
return ret |
|
} |
|
} |
|
readLen := iter.tail - iter.head |
|
copied := make([]byte, readLen, readLen*2) |
|
copy(copied, iter.buf[iter.head:iter.tail]) |
|
iter.head = iter.tail |
|
for iter.Error == nil { |
|
c := iter.readByte() |
|
if c == '"' { |
|
return copied |
|
} |
|
copied = append(copied, c) |
|
} |
|
return copied |
|
} |
|
iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c})) |
|
return |
|
} |
|
|
|
func (iter *Iterator) readU4() (ret rune) { |
|
for i := 0; i < 4; i++ { |
|
c := iter.readByte() |
|
if iter.Error != nil { |
|
return |
|
} |
|
if c >= '0' && c <= '9' { |
|
ret = ret*16 + rune(c-'0') |
|
} else if c >= 'a' && c <= 'f' { |
|
ret = ret*16 + rune(c-'a'+10) |
|
} else if c >= 'A' && c <= 'F' { |
|
ret = ret*16 + rune(c-'A'+10) |
|
} else { |
|
iter.ReportError("readU4", "expects 0~9 or a~f, but found "+string([]byte{c})) |
|
return |
|
} |
|
} |
|
return ret |
|
} |
|
|
|
const ( |
|
t1 = 0x00 // 0000 0000 |
|
tx = 0x80 // 1000 0000 |
|
t2 = 0xC0 // 1100 0000 |
|
t3 = 0xE0 // 1110 0000 |
|
t4 = 0xF0 // 1111 0000 |
|
t5 = 0xF8 // 1111 1000 |
|
|
|
maskx = 0x3F // 0011 1111 |
|
mask2 = 0x1F // 0001 1111 |
|
mask3 = 0x0F // 0000 1111 |
|
mask4 = 0x07 // 0000 0111 |
|
|
|
rune1Max = 1<<7 - 1 |
|
rune2Max = 1<<11 - 1 |
|
rune3Max = 1<<16 - 1 |
|
|
|
surrogateMin = 0xD800 |
|
surrogateMax = 0xDFFF |
|
|
|
maxRune = '\U0010FFFF' // Maximum valid Unicode code point. |
|
runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character" |
|
) |
|
|
|
func appendRune(p []byte, r rune) []byte { |
|
// Negative values are erroneous. Making it unsigned addresses the problem. |
|
switch i := uint32(r); { |
|
case i <= rune1Max: |
|
p = append(p, byte(r)) |
|
return p |
|
case i <= rune2Max: |
|
p = append(p, t2|byte(r>>6)) |
|
p = append(p, tx|byte(r)&maskx) |
|
return p |
|
case i > maxRune, surrogateMin <= i && i <= surrogateMax: |
|
r = runeError |
|
fallthrough |
|
case i <= rune3Max: |
|
p = append(p, t3|byte(r>>12)) |
|
p = append(p, tx|byte(r>>6)&maskx) |
|
p = append(p, tx|byte(r)&maskx) |
|
return p |
|
default: |
|
p = append(p, t4|byte(r>>18)) |
|
p = append(p, tx|byte(r>>12)&maskx) |
|
p = append(p, tx|byte(r>>6)&maskx) |
|
p = append(p, tx|byte(r)&maskx) |
|
return p |
|
} |
|
}
|
|
|