fixup func isBinary to handle CJK runes correctly
Former-commit-id: 880817e9e688f7126eb5e3010f5fc37110b28448 [formerly d44d541d75c625a474ca7c8f9adcc52e20ee69e0] [formerly fbbaf7b6a31ed09944700f1ffa98c0baca9ae0f3 [formerly 1c42539522
]]
Former-commit-id: 2a647332f2d80741f7ac9cd4eccffbf8a0dd5348 [formerly ef7355350d4d1276911bcdd8b4aaaf1b82efe50c]
Former-commit-id: 7c6d116b6bda492cc9544dc4a46d95cd225c1b39
pull/726/head
parent
2ca921b01b
commit
586bb63ee7
|
@ -164,7 +164,7 @@ func (i *FileInfo) detectType(modify, saveContent bool) error {
|
||||||
case strings.HasPrefix(mimetype, "image"):
|
case strings.HasPrefix(mimetype, "image"):
|
||||||
i.Type = "image"
|
i.Type = "image"
|
||||||
return nil
|
return nil
|
||||||
case isBinary(string(buffer[:n])) || i.Size > 10*1024*1024: // 10 MB
|
case isBinary(buffer[:n], n) || i.Size > 10*1024*1024: // 10 MB
|
||||||
i.Type = "blob"
|
i.Type = "blob"
|
||||||
return nil
|
return nil
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -1,12 +1,46 @@
|
||||||
package files
|
package files
|
||||||
|
|
||||||
func isBinary(content string) bool {
|
import (
|
||||||
for _, b := range content {
|
"unicode/utf8"
|
||||||
// 65533 is the unknown char
|
)
|
||||||
|
|
||||||
|
func isBinary(content []byte, n int) bool {
|
||||||
|
maybeStr := string(content)
|
||||||
|
runeCnt := utf8.RuneCount(content)
|
||||||
|
runeIndex := 0
|
||||||
|
gotRuneErrCnt := 0
|
||||||
|
firstRuneErrIndex := -1
|
||||||
|
|
||||||
|
for _, b := range maybeStr {
|
||||||
// 8 and below are control chars (e.g. backspace, null, eof, etc)
|
// 8 and below are control chars (e.g. backspace, null, eof, etc)
|
||||||
if b <= 8 || b == 65533 {
|
if b <= 8 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 0xFFFD(65533) is the "error" Rune or "Unicode replacement character"
|
||||||
|
// see https://golang.org/pkg/unicode/utf8/#pkg-constants
|
||||||
|
if b == 0xFFFD {
|
||||||
|
//if it is not the last (utf8.UTFMax - x) rune
|
||||||
|
if runeCnt > utf8.UTFMax && runeIndex < runeCnt-utf8.UTFMax {
|
||||||
|
return true
|
||||||
|
} else {
|
||||||
|
//else it is the last (utf8.UTFMax - x) rune
|
||||||
|
//there maybe Vxxx, VVxx, VVVx, thus, we may got max 3 0xFFFD rune (asume V is the byte we got)
|
||||||
|
//for Chinese, it can only be Vxx, VVx, we may got max 2 0xFFFD rune
|
||||||
|
gotRuneErrCnt++
|
||||||
|
|
||||||
|
//mark the first time
|
||||||
|
if firstRuneErrIndex == -1 {
|
||||||
|
firstRuneErrIndex = runeIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
runeIndex++
|
||||||
|
}
|
||||||
|
|
||||||
|
//if last (utf8.UTFMax - x ) rune has the "error" Rune, but not all
|
||||||
|
if firstRuneErrIndex != -1 && gotRuneErrCnt != runeCnt-firstRuneErrIndex {
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue