fixup func isBinary to handle CJK runes correctly
Former-commit-id: 880817e9e688f7126eb5e3010f5fc37110b28448 [formerly d44d541d75c625a474ca7c8f9adcc52e20ee69e0] [formerly fbbaf7b6a31ed09944700f1ffa98c0baca9ae0f3 [formerly 1c42539522]]
Former-commit-id: 2a647332f2d80741f7ac9cd4eccffbf8a0dd5348 [formerly ef7355350d4d1276911bcdd8b4aaaf1b82efe50c]
Former-commit-id: 7c6d116b6bda492cc9544dc4a46d95cd225c1b39
			
			
				pull/726/head
			
			
		
							parent
							
								
									2ca921b01b
								
							
						
					
					
						commit
						586bb63ee7
					
				|  | @ -164,7 +164,7 @@ func (i *FileInfo) detectType(modify, saveContent bool) error { | |||
| 	case strings.HasPrefix(mimetype, "image"): | ||||
| 		i.Type = "image" | ||||
| 		return nil | ||||
| 	case isBinary(string(buffer[:n])) || i.Size > 10*1024*1024: // 10 MB
 | ||||
| 	case isBinary(buffer[:n], n) || i.Size > 10*1024*1024: // 10 MB
 | ||||
| 		i.Type = "blob" | ||||
| 		return nil | ||||
| 	default: | ||||
|  |  | |||
|  | @ -1,12 +1,46 @@ | |||
| package files | ||||
| 
 | ||||
| func isBinary(content string) bool { | ||||
| 	for _, b := range content { | ||||
| 		// 65533 is the unknown char
 | ||||
| import ( | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
| 
 | ||||
| func isBinary(content []byte, n int) bool { | ||||
| 	maybeStr := string(content) | ||||
| 	runeCnt := utf8.RuneCount(content) | ||||
| 	runeIndex := 0 | ||||
| 	gotRuneErrCnt := 0 | ||||
| 	firstRuneErrIndex := -1 | ||||
| 
 | ||||
| 	for _, b := range maybeStr { | ||||
| 		// 8 and below are control chars (e.g. backspace, null, eof, etc)
 | ||||
| 		if b <= 8 || b == 65533 { | ||||
| 		if b <= 8 { | ||||
| 			return true | ||||
| 		} | ||||
| 
 | ||||
| 		// 0xFFFD(65533) is  the "error" Rune or "Unicode replacement character"
 | ||||
| 		// see https://golang.org/pkg/unicode/utf8/#pkg-constants
 | ||||
| 		if b == 0xFFFD { | ||||
| 			//if it is not the last (utf8.UTFMax - x) rune
 | ||||
| 			if runeCnt > utf8.UTFMax && runeIndex < runeCnt-utf8.UTFMax { | ||||
| 				return true | ||||
| 			} else { | ||||
| 				//else it is the last (utf8.UTFMax - x) rune
 | ||||
| 				//there maybe Vxxx, VVxx, VVVx, thus, we may got max 3 0xFFFD rune (asume V is the byte we got)
 | ||||
| 				//for Chinese, it can only be Vxx, VVx, we may got max 2 0xFFFD rune
 | ||||
| 				gotRuneErrCnt++ | ||||
| 
 | ||||
| 				//mark the first time
 | ||||
| 				if firstRuneErrIndex == -1 { | ||||
| 					firstRuneErrIndex = runeIndex | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		runeIndex++ | ||||
| 	} | ||||
| 
 | ||||
| 	//if last (utf8.UTFMax - x ) rune has the "error" Rune, but not all
 | ||||
| 	if firstRuneErrIndex != -1 && gotRuneErrCnt != runeCnt-firstRuneErrIndex { | ||||
| 		return true | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 荒野無燈
						荒野無燈