diff --git a/api/src/main/java/run/halo/app/infra/utils/FileTypeDetectUtils.java b/api/src/main/java/run/halo/app/infra/utils/FileTypeDetectUtils.java index f9326863e..7166260d4 100644 --- a/api/src/main/java/run/halo/app/infra/utils/FileTypeDetectUtils.java +++ b/api/src/main/java/run/halo/app/infra/utils/FileTypeDetectUtils.java @@ -1,29 +1,52 @@ package run.halo.app.infra.utils; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import lombok.experimental.UtilityClass; -import org.apache.tika.Tika; +import org.apache.tika.detect.DefaultDetector; +import org.apache.tika.detect.Detector; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MimeTypeException; import org.apache.tika.mime.MimeTypes; +import org.springframework.util.Assert; @UtilityClass public class FileTypeDetectUtils { - private static final Tika tika = new Tika(); + private static final Detector detector = new DefaultDetector(); + + /** + *

Detects the media type of the given document.

+ *

The type detection is based on the content of the given document stream and the name of + * the document.

+ * + * @param inputStream the document stream must not be null + * @throws IOException if the stream can not be read + */ + public static String detectMimeType(InputStream inputStream, String name) throws IOException { + Assert.notNull(name, "The name of the document must not be null"); + var metadata = new Metadata(); + metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name); + return doDetectMimeType(inputStream, metadata); + } /** * Detect mime type. * - * @param inputStream input stream will be closed after detection. + * @param inputStream input stream will be closed after detection, must not be null */ public static String detectMimeType(InputStream inputStream) throws IOException { - try { - return tika.detect(inputStream); - } finally { - if (inputStream != null) { - inputStream.close(); - } + return doDetectMimeType(inputStream, new Metadata()); + } + + private static String doDetectMimeType(InputStream inputStream, Metadata metadata) + throws IOException { + Assert.notNull(inputStream, "The inputStream must not be null"); + try (var stream = (!inputStream.markSupported() + ? new BufferedInputStream(inputStream) : inputStream)) { + return detector.detect(stream, metadata).toString(); } } diff --git a/application/src/main/java/run/halo/app/core/attachment/endpoint/LocalAttachmentUploadHandler.java b/application/src/main/java/run/halo/app/core/attachment/endpoint/LocalAttachmentUploadHandler.java index bb48574d4..2c4351112 100644 --- a/application/src/main/java/run/halo/app/core/attachment/endpoint/LocalAttachmentUploadHandler.java +++ b/application/src/main/java/run/halo/app/core/attachment/endpoint/LocalAttachmentUploadHandler.java @@ -158,7 +158,7 @@ class LocalAttachmentUploadHandler implements AttachmentHandler { var typeValidator = file.content() .next() .handle((dataBuffer, sink) -> { - var mimeType = detectMimeType(dataBuffer.asInputStream()); + var mimeType = detectMimeType(dataBuffer.asInputStream(), file.name()); var isAllow = setting.getAllowedFileTypes() .stream() .map(FileCategoryMatcher::of) @@ -178,9 +178,9 @@ class LocalAttachmentUploadHandler implements AttachmentHandler { } @NonNull - private String detectMimeType(InputStream inputStream) { + private String detectMimeType(InputStream inputStream, String name) { try { - return FileTypeDetectUtils.detectMimeType(inputStream); + return FileTypeDetectUtils.detectMimeType(inputStream, name); } catch (IOException e) { log.warn("Failed to detect file type", e); return "Unknown"; diff --git a/application/src/test/java/run/halo/app/infra/utils/FileTypeDetectUtilsTest.java b/application/src/test/java/run/halo/app/infra/utils/FileTypeDetectUtilsTest.java index a69ee0c80..1c3e407d0 100644 --- a/application/src/test/java/run/halo/app/infra/utils/FileTypeDetectUtilsTest.java +++ b/application/src/test/java/run/halo/app/infra/utils/FileTypeDetectUtilsTest.java @@ -3,6 +3,7 @@ package run.halo.app.infra.utils; import static org.assertj.core.api.Assertions.assertThat; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import org.apache.tika.mime.MimeTypeException; import org.junit.jupiter.api.Test; @@ -31,6 +32,60 @@ class FileTypeDetectUtilsTest { assertThat(mimeType).isEqualTo("application/zip"); } + @Test + void detectMimeTypeWithNameTest() throws IOException { + var stream = getFileInputStream("classpath:file-type-detect/index.js"); + String mimeType = FileTypeDetectUtils.detectMimeType(stream, "index.js"); + assertThat(mimeType).isEqualTo("application/javascript"); + + stream = getFileInputStream("classpath:file-type-detect/index.html"); + mimeType = + FileTypeDetectUtils.detectMimeType(stream, "index.html"); + assertThat(mimeType).isEqualTo("text/html"); + + stream = getFileInputStream("classpath:file-type-detect/test.json"); + mimeType = FileTypeDetectUtils.detectMimeType(stream, "test.json"); + assertThat(mimeType).isEqualTo("application/json"); + + stream = getFileInputStream("classpath:file-type-detect/other.xlsx"); + mimeType = FileTypeDetectUtils.detectMimeType(stream, "other.xlsx"); + assertThat(mimeType).isEqualTo( + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); + + // other.xlsx detect without name + stream = getFileInputStream("classpath:file-type-detect/other.xlsx"); + mimeType = FileTypeDetectUtils.detectMimeType(stream); + assertThat(mimeType).isEqualTo("application/zip"); + + // other.xlsx detect with wrong name + stream = getFileInputStream("classpath:file-type-detect/other.xlsx"); + mimeType = FileTypeDetectUtils.detectMimeType(stream, "other.txt"); + assertThat(mimeType).isEqualTo("application/zip"); + + stream = getFileInputStream("classpath:file-type-detect/test.docx"); + mimeType = FileTypeDetectUtils.detectMimeType(stream, "test.docx"); + assertThat(mimeType).isEqualTo( + "application/vnd.openxmlformats-officedocument.wordprocessingml.document"); + + // docx detect without file name + stream = getFileInputStream("classpath:file-type-detect/test.docx"); + mimeType = FileTypeDetectUtils.detectMimeType(stream); + assertThat(mimeType).isEqualTo("application/zip"); + + stream = getFileInputStream("classpath:file-type-detect/test.svg"); + mimeType = FileTypeDetectUtils.detectMimeType(stream, "test.svg"); + assertThat(mimeType).isEqualTo("image/svg+xml"); + + stream = getFileInputStream("classpath:file-type-detect/test.png"); + mimeType = FileTypeDetectUtils.detectMimeType(stream, "test.png"); + assertThat(mimeType).isEqualTo("image/png"); + } + + private static InputStream getFileInputStream(String location) throws IOException { + var file = ResourceUtils.getFile(location); + return Files.newInputStream(file.toPath()); + } + @Test void detectFileExtensionTest() throws MimeTypeException { var ext = FileTypeDetectUtils.detectFileExtension("application/x-x509-key; format=pem"); diff --git a/application/src/test/resources/file-type-detect/index.html b/application/src/test/resources/file-type-detect/index.html new file mode 100644 index 000000000..e69de29bb diff --git a/application/src/test/resources/file-type-detect/index.js b/application/src/test/resources/file-type-detect/index.js new file mode 100644 index 000000000..e69de29bb diff --git a/application/src/test/resources/file-type-detect/other.xlsx b/application/src/test/resources/file-type-detect/other.xlsx new file mode 100644 index 000000000..1d2b9647f Binary files /dev/null and b/application/src/test/resources/file-type-detect/other.xlsx differ diff --git a/application/src/test/resources/file-type-detect/test.docx b/application/src/test/resources/file-type-detect/test.docx new file mode 100644 index 000000000..2b083a6fa Binary files /dev/null and b/application/src/test/resources/file-type-detect/test.docx differ diff --git a/application/src/test/resources/file-type-detect/test.json b/application/src/test/resources/file-type-detect/test.json new file mode 100644 index 000000000..e69de29bb diff --git a/application/src/test/resources/file-type-detect/test.png b/application/src/test/resources/file-type-detect/test.png new file mode 100644 index 000000000..820549a92 Binary files /dev/null and b/application/src/test/resources/file-type-detect/test.png differ diff --git a/application/src/test/resources/file-type-detect/test.svg b/application/src/test/resources/file-type-detect/test.svg new file mode 100644 index 000000000..e93f41998 --- /dev/null +++ b/application/src/test/resources/file-type-detect/test.svg @@ -0,0 +1 @@ + \ No newline at end of file