diff --git a/docs-core/src/main/java/com/sismics/util/mime/MimeType.java b/docs-core/src/main/java/com/sismics/util/mime/MimeType.java index 1ea316b2..f45e1f96 100644 --- a/docs-core/src/main/java/com/sismics/util/mime/MimeType.java +++ b/docs-core/src/main/java/com/sismics/util/mime/MimeType.java @@ -13,7 +13,7 @@ public class MimeType { public static final String IMAGE_GIF = "image/gif"; public static final String APPLICATION_ZIP = "application/zip"; - + public static final String APPLICATION_PDF = "application/pdf"; public static final String OPEN_DOCUMENT_TEXT = "application/vnd.oasis.opendocument.text"; diff --git a/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java b/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java index 0a9ea3d7..546efcb4 100644 --- a/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java +++ b/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java @@ -1,16 +1,9 @@ package com.sismics.util.mime; -import com.google.common.base.Charsets; -import org.apache.commons.compress.utils.IOUtils; - import java.io.IOException; -import java.io.InputStream; import java.net.URLConnection; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; /** * Utility to check MIME types. @@ -19,7 +12,7 @@ import java.util.zip.ZipInputStream; */ public class MimeTypeUtil { /** - * Try to guess the MIME type of a file by its magic number (header). + * Try to guess the MIME type of a file. * * @param file File to inspect * @param name File name @@ -27,59 +20,17 @@ public class MimeTypeUtil { * @throws IOException e */ public static String guessMimeType(Path file, String name) throws IOException { - String mimeType = name == null ? - null : URLConnection.getFileNameMap().getContentTypeFor(name); + String mimeType = Files.probeContentType(file); + + if (mimeType == null && name != null) { + mimeType = URLConnection.getFileNameMap().getContentTypeFor(name); + } + if (mimeType == null) { - try (InputStream is = Files.newInputStream(file)) { - final byte[] headerBytes = new byte[64]; - is.read(headerBytes); - mimeType = guessMimeType(headerBytes, name); - } + return MimeType.DEFAULT; } - return guessOpenDocumentFormat(mimeType, file); - } - - /** - * Try to guess the MIME type of a file by its magic number (header). - * - * @param headerBytes File header (first bytes) - * @param name File name - * @return MIME type - */ - public static String guessMimeType(byte[] headerBytes, String name) { - String header = new String(headerBytes, StandardCharsets.US_ASCII); - - // Detect by header bytes - if (header.startsWith("PK")) { - return MimeType.APPLICATION_ZIP; - } else if (header.startsWith("GIF87a") || header.startsWith("GIF89a")) { - return MimeType.IMAGE_GIF; - } else if (headerBytes[0] == ((byte) 0xff) && headerBytes[1] == ((byte) 0xd8)) { - return MimeType.IMAGE_JPEG; - } else if (headerBytes[0] == ((byte) 0x89) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x4e) && headerBytes[3] == ((byte) 0x47) && - headerBytes[4] == ((byte) 0x0d) && headerBytes[5] == ((byte) 0x0a) && headerBytes[6] == ((byte) 0x1a) && headerBytes[7] == ((byte) 0x0a)) { - return MimeType.IMAGE_PNG; - } else if (headerBytes[0] == ((byte) 0x25) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x44) && headerBytes[3] == ((byte) 0x46)) { - return MimeType.APPLICATION_PDF; - } else if (headerBytes[0] == ((byte) 0x00) && headerBytes[1] == ((byte) 0x00) && headerBytes[2] == ((byte) 0x00) - && (headerBytes[3] == ((byte) 0x14) || headerBytes[3] == ((byte) 0x18) || headerBytes[3] == ((byte) 0x20)) - && headerBytes[4] == ((byte) 0x66) && headerBytes[5] == ((byte) 0x74) && headerBytes[6] == ((byte) 0x79) && headerBytes[7] == ((byte) 0x70)) { - return MimeType.VIDEO_MP4; - } else if (headerBytes[0] == ((byte) 0x1a) && headerBytes[1] == ((byte) 0x45) && headerBytes[2] == ((byte) 0xdf) && headerBytes[3] == ((byte) 0xa3)) { - return MimeType.VIDEO_WEBM; - } - - // Detect by file extension - if (name != null) { - if (name.endsWith(".txt")) { - return MimeType.TEXT_PLAIN; - } else if (name.endsWith(".csv")) { - return MimeType.TEXT_CSV; - } - } - - return MimeType.DEFAULT; + return mimeType; } /** @@ -116,52 +67,4 @@ public class MimeTypeUtil { return "bin"; } } - - /** - * Guess the MIME type of open document formats (docx and odt). - * It's more costly than the simple header check, but needed because open document formats - * are simple ZIP files on the outside and much bigger on the inside. - * - * @param mimeType Currently detected MIME type - * @param file File on disk - * @return MIME type - */ - private static String guessOpenDocumentFormat(String mimeType, Path file) { - if (!MimeType.APPLICATION_ZIP.equals(mimeType)) { - // open document formats are ZIP files - return mimeType; - } - - try (InputStream inputStream = Files.newInputStream(file); - ZipInputStream zipInputStream = new ZipInputStream(inputStream, Charsets.ISO_8859_1)) { - ZipEntry archiveEntry = zipInputStream.getNextEntry(); - while (archiveEntry != null) { - if (archiveEntry.getName().equals("mimetype")) { - // Maybe it's an ODT file - String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1); - if (MimeType.OPEN_DOCUMENT_TEXT.equals(content.trim())) { - mimeType = MimeType.OPEN_DOCUMENT_TEXT; - break; - } - } else if (archiveEntry.getName().equals("[Content_Types].xml")) { - // Maybe it's a DOCX file - String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1); - if (content.contains(MimeType.OFFICE_DOCUMENT)) { - mimeType = MimeType.OFFICE_DOCUMENT; - break; - } else if (content.contains(MimeType.OFFICE_PRESENTATION)) { - mimeType = MimeType.OFFICE_PRESENTATION; - break; - } - } - - archiveEntry = zipInputStream.getNextEntry(); - } - } catch (Exception e) { - // In case of any error, just give up and keep the ZIP MIME type - return mimeType; - } - - return mimeType; - } } diff --git a/docs-core/src/test/java/com/sismics/util/TestMimeTypeUtil.java b/docs-core/src/test/java/com/sismics/util/TestMimeTypeUtil.java index 1fa18fb1..3f7d1cfe 100644 --- a/docs-core/src/test/java/com/sismics/util/TestMimeTypeUtil.java +++ b/docs-core/src/test/java/com/sismics/util/TestMimeTypeUtil.java @@ -15,7 +15,7 @@ import java.nio.file.Paths; */ public class TestMimeTypeUtil { @Test - public void guessOpenDocumentFormatTest() throws Exception { + public void test() throws Exception { // Detect ODT files Path path = Paths.get(ClassLoader.getSystemResource("file/document.odt").toURI()); Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessMimeType(path, "document.odt")); @@ -28,7 +28,44 @@ public class TestMimeTypeUtil { path = Paths.get(ClassLoader.getSystemResource("file/apache.pptx").toURI()); Assert.assertEquals(MimeType.OFFICE_PRESENTATION, MimeTypeUtil.guessMimeType(path, "apache.pptx")); + // Detect XLSX files + path = Paths.get(ClassLoader.getSystemResource("file/document.xlsx").toURI()); + Assert.assertEquals(MimeType.OFFICE_SHEET, MimeTypeUtil.guessMimeType(path, "document.xlsx")); + // Detect TXT files - Assert.assertEquals(MimeType.TEXT_PLAIN, MimeTypeUtil.guessMimeType(path, "file.txt")); + path = Paths.get(ClassLoader.getSystemResource("file/document.txt").toURI()); + Assert.assertEquals(MimeType.TEXT_PLAIN, MimeTypeUtil.guessMimeType(path, "document.txt")); + + // Detect CSV files + path = Paths.get(ClassLoader.getSystemResource("file/document.csv").toURI()); + Assert.assertEquals(MimeType.TEXT_CSV, MimeTypeUtil.guessMimeType(path, "document.csv")); + + // Detect PDF files + path = Paths.get(ClassLoader.getSystemResource("file/udhr.pdf").toURI()); + Assert.assertEquals(MimeType.APPLICATION_PDF, MimeTypeUtil.guessMimeType(path, "udhr.pdf")); + + // Detect JPEG files + path = Paths.get(ClassLoader.getSystemResource("file/apollo_portrait.jpg").toURI()); + Assert.assertEquals(MimeType.IMAGE_JPEG, MimeTypeUtil.guessMimeType(path, "apollo_portrait.jpg")); + + // Detect GIF files + path = Paths.get(ClassLoader.getSystemResource("file/image.gif").toURI()); + Assert.assertEquals(MimeType.IMAGE_GIF, MimeTypeUtil.guessMimeType(path, "image.gif")); + + // Detect PNG files + path = Paths.get(ClassLoader.getSystemResource("file/image.png").toURI()); + Assert.assertEquals(MimeType.IMAGE_PNG, MimeTypeUtil.guessMimeType(path, "image.png")); + + // Detect ZIP files + path = Paths.get(ClassLoader.getSystemResource("file/document.zip").toURI()); + Assert.assertEquals(MimeType.APPLICATION_ZIP, MimeTypeUtil.guessMimeType(path, "document.zip")); + + // Detect WEBM files + path = Paths.get(ClassLoader.getSystemResource("file/video.webm").toURI()); + Assert.assertEquals(MimeType.VIDEO_WEBM, MimeTypeUtil.guessMimeType(path, "video.webm")); + + // Detect MP4 files + path = Paths.get(ClassLoader.getSystemResource("file/video.mp4").toURI()); + Assert.assertEquals(MimeType.VIDEO_MP4, MimeTypeUtil.guessMimeType(path, "video.mp4")); } } diff --git a/docs-core/src/test/resources/file/document.csv b/docs-core/src/test/resources/file/document.csv new file mode 100644 index 00000000..f26e670a --- /dev/null +++ b/docs-core/src/test/resources/file/document.csv @@ -0,0 +1,2 @@ +col1,col2 +test,me \ No newline at end of file diff --git a/docs-core/src/test/resources/file/document.txt b/docs-core/src/test/resources/file/document.txt new file mode 100644 index 00000000..c076d962 --- /dev/null +++ b/docs-core/src/test/resources/file/document.txt @@ -0,0 +1 @@ +test me. \ No newline at end of file diff --git a/docs-core/src/test/resources/file/document.xlsx b/docs-core/src/test/resources/file/document.xlsx new file mode 100644 index 00000000..c67d3776 Binary files /dev/null and b/docs-core/src/test/resources/file/document.xlsx differ diff --git a/docs-core/src/test/resources/file/document.zip b/docs-core/src/test/resources/file/document.zip new file mode 100644 index 00000000..f9d8d7de Binary files /dev/null and b/docs-core/src/test/resources/file/document.zip differ diff --git a/docs-core/src/test/resources/file/image.gif b/docs-core/src/test/resources/file/image.gif new file mode 100644 index 00000000..6d302dc6 Binary files /dev/null and b/docs-core/src/test/resources/file/image.gif differ diff --git a/docs-core/src/test/resources/file/image.png b/docs-core/src/test/resources/file/image.png new file mode 100644 index 00000000..48b92ecc Binary files /dev/null and b/docs-core/src/test/resources/file/image.png differ diff --git a/docs-core/src/test/resources/file/video.mp4 b/docs-core/src/test/resources/file/video.mp4 new file mode 100644 index 00000000..3355ae77 Binary files /dev/null and b/docs-core/src/test/resources/file/video.mp4 differ diff --git a/docs-core/src/test/resources/file/video.webm b/docs-core/src/test/resources/file/video.webm new file mode 100644 index 00000000..0757a975 Binary files /dev/null and b/docs-core/src/test/resources/file/video.webm differ