🐛 fix determine gbk

pull/548/head
微凉 2021-11-16 00:03:49 +08:00
parent 8987958e26
commit 5e982980dc
3 changed files with 68 additions and 44 deletions

View File

@ -99,18 +99,20 @@ func init() {
func Text(c *gin.Context, link string) { func Text(c *gin.Context, link string) {
res, err := client.R().Get(link) res, err := client.R().Get(link)
if err != nil {
ErrorResp(c, err, 500)
return
}
text := res.String() text := res.String()
if utils.IsGBK(res.Body()) { t := utils.GetStrCoding(res.Body())
log.Debugf("text type: %s", t)
if t != utils.UTF8 {
body, err := utils.GbkToUtf8(res.Body()) body, err := utils.GbkToUtf8(res.Body())
if err != nil { if err != nil {
ErrorResp(c,err,500) ErrorResp(c, err, 500)
return return
} }
text = string(body) text = string(body)
} }
if err != nil { c.String(200, text)
ErrorResp(c,err,500)
return
}
c.String(200,text)
} }

59
utils/code.go Normal file
View File

@ -0,0 +1,59 @@
package utils
import (
"bytes"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
"io/ioutil"
"unicode/utf8"
)
func IsGBK(data []byte) bool {
length := len(data)
var i = 0
for i < length {
if data[i] <= 0x7f {
//编码0~127,只有一个字节的编码兼容ASCII码
i++
continue
} else {
//大于127的使用双字节编码落在gbk编码范围内的字符
if data[i] >= 0x81 &&
data[i] <= 0xfe &&
data[i+1] >= 0x40 &&
data[i+1] <= 0xfe &&
data[i+1] != 0xf7 {
i += 2
continue
} else {
return false
}
}
}
return true
}
const (
GBK string = "GBK"
UTF8 string = "UTF8"
UNKNOWN string = "UNKNOWN"
)
func GetStrCoding(data []byte) string {
if utf8.Valid(data) {
return UTF8
} else if IsGBK(data) {
return GBK
} else {
return UNKNOWN
}
}
func GbkToUtf8(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}

View File

@ -1,12 +1,9 @@
package utils package utils
import ( import (
"bytes"
"encoding/json" "encoding/json"
"github.com/Xhofe/alist/conf" "github.com/Xhofe/alist/conf"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
@ -92,37 +89,3 @@ func ParsePath(path string) string {
return path return path
} }
func IsGBK(data []byte) bool {
length := len(data)
var i int = 0
for i < length {
//fmt.Printf("for %x\n", data[i])
if data[i] <= 0xff {
//编码小于等于127,只有一个字节的编码兼容ASCII吗
i++
continue
} else {
//大于127的使用双字节编码
if data[i] >= 0x81 &&
data[i] <= 0xfe &&
data[i + 1] >= 0x40 &&
data[i + 1] <= 0xfe &&
data[i + 1] != 0xf7 {
i += 2
continue
} else {
return false
}
}
}
return true
}
func GbkToUtf8(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}