refactor: optimize file type detection and support decision-making based on file name (#6675)

#### What type of PR is this?
/kind improvement
/area core
/milestone 2.20.x

#### What this PR does / why we need it:
优化文件类型检测并支持根据文件名作为决策依据

#### Does this PR introduce a user-facing change?
```release-note
优化文件类型检测并支持根据文件名作为决策依据
```
pull/6810/head
guqing 2024-10-10 15:45:00 +08:00 committed by GitHub
parent 2b3badc8e1
commit 605d52a86e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 91 additions and 12 deletions

View File

@ -1,29 +1,52 @@
package run.halo.app.infra.utils;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import lombok.experimental.UtilityClass;
import org.apache.tika.Tika;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
import org.springframework.util.Assert;
@UtilityClass
public class FileTypeDetectUtils {
private static final Tika tika = new Tika();
private static final Detector detector = new DefaultDetector();
/**
* <p>Detects the media type of the given document.</p>
* <p>The type detection is based on the content of the given document stream and the name of
* the document.</p>
*
* @param inputStream the document stream must not be null
* @throws IOException if the stream can not be read
*/
public static String detectMimeType(InputStream inputStream, String name) throws IOException {
Assert.notNull(name, "The name of the document must not be null");
var metadata = new Metadata();
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
return doDetectMimeType(inputStream, metadata);
}
/**
* Detect mime type.
*
* @param inputStream input stream will be closed after detection.
* @param inputStream input stream will be closed after detection, must not be null
*/
public static String detectMimeType(InputStream inputStream) throws IOException {
try {
return tika.detect(inputStream);
} finally {
if (inputStream != null) {
inputStream.close();
return doDetectMimeType(inputStream, new Metadata());
}
private static String doDetectMimeType(InputStream inputStream, Metadata metadata)
throws IOException {
Assert.notNull(inputStream, "The inputStream must not be null");
try (var stream = (!inputStream.markSupported()
? new BufferedInputStream(inputStream) : inputStream)) {
return detector.detect(stream, metadata).toString();
}
}

View File

@ -158,7 +158,7 @@ class LocalAttachmentUploadHandler implements AttachmentHandler {
var typeValidator = file.content()
.next()
.handle((dataBuffer, sink) -> {
var mimeType = detectMimeType(dataBuffer.asInputStream());
var mimeType = detectMimeType(dataBuffer.asInputStream(), file.name());
var isAllow = setting.getAllowedFileTypes()
.stream()
.map(FileCategoryMatcher::of)
@ -178,9 +178,9 @@ class LocalAttachmentUploadHandler implements AttachmentHandler {
}
@NonNull
private String detectMimeType(InputStream inputStream) {
private String detectMimeType(InputStream inputStream, String name) {
try {
return FileTypeDetectUtils.detectMimeType(inputStream);
return FileTypeDetectUtils.detectMimeType(inputStream, name);
} catch (IOException e) {
log.warn("Failed to detect file type", e);
return "Unknown";

View File

@ -3,6 +3,7 @@ package run.halo.app.infra.utils;
import static org.assertj.core.api.Assertions.assertThat;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import org.apache.tika.mime.MimeTypeException;
import org.junit.jupiter.api.Test;
@ -31,6 +32,60 @@ class FileTypeDetectUtilsTest {
assertThat(mimeType).isEqualTo("application/zip");
}
@Test
void detectMimeTypeWithNameTest() throws IOException {
var stream = getFileInputStream("classpath:file-type-detect/index.js");
String mimeType = FileTypeDetectUtils.detectMimeType(stream, "index.js");
assertThat(mimeType).isEqualTo("application/javascript");
stream = getFileInputStream("classpath:file-type-detect/index.html");
mimeType =
FileTypeDetectUtils.detectMimeType(stream, "index.html");
assertThat(mimeType).isEqualTo("text/html");
stream = getFileInputStream("classpath:file-type-detect/test.json");
mimeType = FileTypeDetectUtils.detectMimeType(stream, "test.json");
assertThat(mimeType).isEqualTo("application/json");
stream = getFileInputStream("classpath:file-type-detect/other.xlsx");
mimeType = FileTypeDetectUtils.detectMimeType(stream, "other.xlsx");
assertThat(mimeType).isEqualTo(
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
// other.xlsx detect without name
stream = getFileInputStream("classpath:file-type-detect/other.xlsx");
mimeType = FileTypeDetectUtils.detectMimeType(stream);
assertThat(mimeType).isEqualTo("application/zip");
// other.xlsx detect with wrong name
stream = getFileInputStream("classpath:file-type-detect/other.xlsx");
mimeType = FileTypeDetectUtils.detectMimeType(stream, "other.txt");
assertThat(mimeType).isEqualTo("application/zip");
stream = getFileInputStream("classpath:file-type-detect/test.docx");
mimeType = FileTypeDetectUtils.detectMimeType(stream, "test.docx");
assertThat(mimeType).isEqualTo(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document");
// docx detect without file name
stream = getFileInputStream("classpath:file-type-detect/test.docx");
mimeType = FileTypeDetectUtils.detectMimeType(stream);
assertThat(mimeType).isEqualTo("application/zip");
stream = getFileInputStream("classpath:file-type-detect/test.svg");
mimeType = FileTypeDetectUtils.detectMimeType(stream, "test.svg");
assertThat(mimeType).isEqualTo("image/svg+xml");
stream = getFileInputStream("classpath:file-type-detect/test.png");
mimeType = FileTypeDetectUtils.detectMimeType(stream, "test.png");
assertThat(mimeType).isEqualTo("image/png");
}
private static InputStream getFileInputStream(String location) throws IOException {
var file = ResourceUtils.getFile(location);
return Files.newInputStream(file.toPath());
}
@Test
void detectFileExtensionTest() throws MimeTypeException {
var ext = FileTypeDetectUtils.detectFileExtension("application/x-x509-key; format=pem");

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 24 24"><path fill="currentColor" d="M12 5q-.425 0-.712-.288Q11 4.425 11 4t.288-.713Q11.575 3 12 3t.713.287Q13 3.575 13 4t-.287.712Q12.425 5 12 5Zm0-3q-.425 0-.712-.288Q11 1.425 11 1t.288-.713Q11.575 0 12 0t.713.287Q13 .575 13 1t-.287.712Q12.425 2 12 2ZM8.4 22q-1.6-.95-2.5-2.538Q5 17.875 5 16q0-2.025 1.075-3.738Q7.15 10.55 9 9.675V7q0-.425.288-.713Q9.575 6 10 6h4q.425 0 .713.287Q15 6.575 15 7v2.675q1.825.875 2.913 2.587Q19 13.975 19 16q0 1.85-.913 3.45q-.912 1.6-2.487 2.55Zm.6-2h6q.95-.725 1.475-1.763Q17 17.2 17 16q0-1.45-.762-2.663q-.763-1.212-2.088-1.837L13 10.95V8h-2v2.95l-1.15.55q-1.325.625-2.087 1.837Q7 14.55 7 16q0 1.2.525 2.237Q8.05 19.275 9 20ZM7 3q-.425 0-.713-.288Q6 2.425 6 2t.287-.713Q6.575 1 7 1t.713.287Q8 1.575 8 2t-.287.712Q7.425 3 7 3Zm10 0q-.425 0-.712-.288Q16 2.425 16 2t.288-.713Q16.575 1 17 1t.712.287Q18 1.575 18 2t-.288.712Q17.425 3 17 3ZM9 5q-.425 0-.712-.288Q8 4.425 8 4t.288-.713Q8.575 3 9 3t.713.287Q10 3.575 10 4t-.287.712Q9.425 5 9 5Zm6 0q-.425 0-.712-.288Q14 4.425 14 4t.288-.713Q14.575 3 15 3t.713.287Q16 3.575 16 4t-.287.712Q15.425 5 15 5Zm-3 9Z"/></svg>

After

Width:  |  Height:  |  Size: 1.1 KiB