fix: 增大小文本文件检测字符编码的正确率;处理并发隐患

pull/241/head
“yaya” 2023-10-06 16:22:32 +08:00
parent 31c7b2dfb8
commit 42cf6b2955
4 changed files with 4619 additions and 5 deletions

View File

@ -18,6 +18,7 @@ import java.nio.file.Files;
public class EncodingDetects {
private static UniversalDetector detector = new UniversalDetector(null);
private static final int DEFAULT_LENGTH = 4096;
private static final int LIMIT = 50;
private static final Logger logger = LoggerFactory.getLogger(EncodingDetects.class);
public static String getJavaEncode(String filePath) {
@ -36,10 +37,16 @@ public class EncodingDetects {
}
public static String getJavaEncode(byte[] content) {
detector.reset();
detector.handleData(content, 0, content.length);
detector.dataEnd();
String charsetName = detector.getDetectedCharset();
if (content != null && content.length <= LIMIT) {
return SimpleEncodingDetects.getJavaEncode(content);
}
String charsetName;
synchronized (EncodingDetects.class) {
detector.reset();
detector.handleData(content, 0, content.length);
detector.dataEnd();
charsetName = detector.getDetectedCharset();
}
if (charsetName == null) {
charsetName = Charset.defaultCharset().name();
}

File diff suppressed because it is too large Load Diff

View File

@ -17,7 +17,7 @@ import java.net.URISyntaxException;
public class EncodingTests {
@Test
void testCharDet() throws URISyntaxException {
for (int i = 0; i < 28; i++) {
for (int i = 0; i < 29; i++) {
File dir = new File(getClass().getClassLoader().getResource("testData\\" + i).toURI());
String dirPath = dir.getPath();
String textFileName = dir.list()[0];

View File

@ -0,0 +1,3 @@
发斯蒂芬斯蒂芬
顶顶顶顶~