mirror of
https://github.com/sismics/docs.git
synced 2024-11-22 14:07:55 +01:00
Closes #639: rework mime type resolution using java api
This commit is contained in:
parent
1ccce3f942
commit
1f7c0afc1e
@ -13,7 +13,7 @@ public class MimeType {
|
|||||||
public static final String IMAGE_GIF = "image/gif";
|
public static final String IMAGE_GIF = "image/gif";
|
||||||
|
|
||||||
public static final String APPLICATION_ZIP = "application/zip";
|
public static final String APPLICATION_ZIP = "application/zip";
|
||||||
|
|
||||||
public static final String APPLICATION_PDF = "application/pdf";
|
public static final String APPLICATION_PDF = "application/pdf";
|
||||||
|
|
||||||
public static final String OPEN_DOCUMENT_TEXT = "application/vnd.oasis.opendocument.text";
|
public static final String OPEN_DOCUMENT_TEXT = "application/vnd.oasis.opendocument.text";
|
||||||
|
@ -1,16 +1,9 @@
|
|||||||
package com.sismics.util.mime;
|
package com.sismics.util.mime;
|
||||||
|
|
||||||
import com.google.common.base.Charsets;
|
|
||||||
import org.apache.commons.compress.utils.IOUtils;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
|
||||||
import java.net.URLConnection;
|
import java.net.URLConnection;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipInputStream;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility to check MIME types.
|
* Utility to check MIME types.
|
||||||
@ -19,7 +12,7 @@ import java.util.zip.ZipInputStream;
|
|||||||
*/
|
*/
|
||||||
public class MimeTypeUtil {
|
public class MimeTypeUtil {
|
||||||
/**
|
/**
|
||||||
* Try to guess the MIME type of a file by its magic number (header).
|
* Try to guess the MIME type of a file.
|
||||||
*
|
*
|
||||||
* @param file File to inspect
|
* @param file File to inspect
|
||||||
* @param name File name
|
* @param name File name
|
||||||
@ -27,59 +20,17 @@ public class MimeTypeUtil {
|
|||||||
* @throws IOException e
|
* @throws IOException e
|
||||||
*/
|
*/
|
||||||
public static String guessMimeType(Path file, String name) throws IOException {
|
public static String guessMimeType(Path file, String name) throws IOException {
|
||||||
String mimeType = name == null ?
|
String mimeType = Files.probeContentType(file);
|
||||||
null : URLConnection.getFileNameMap().getContentTypeFor(name);
|
|
||||||
|
if (mimeType == null && name != null) {
|
||||||
|
mimeType = URLConnection.getFileNameMap().getContentTypeFor(name);
|
||||||
|
}
|
||||||
|
|
||||||
if (mimeType == null) {
|
if (mimeType == null) {
|
||||||
try (InputStream is = Files.newInputStream(file)) {
|
return MimeType.DEFAULT;
|
||||||
final byte[] headerBytes = new byte[64];
|
|
||||||
is.read(headerBytes);
|
|
||||||
mimeType = guessMimeType(headerBytes, name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return guessOpenDocumentFormat(mimeType, file);
|
return mimeType;
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Try to guess the MIME type of a file by its magic number (header).
|
|
||||||
*
|
|
||||||
* @param headerBytes File header (first bytes)
|
|
||||||
* @param name File name
|
|
||||||
* @return MIME type
|
|
||||||
*/
|
|
||||||
public static String guessMimeType(byte[] headerBytes, String name) {
|
|
||||||
String header = new String(headerBytes, StandardCharsets.US_ASCII);
|
|
||||||
|
|
||||||
// Detect by header bytes
|
|
||||||
if (header.startsWith("PK")) {
|
|
||||||
return MimeType.APPLICATION_ZIP;
|
|
||||||
} else if (header.startsWith("GIF87a") || header.startsWith("GIF89a")) {
|
|
||||||
return MimeType.IMAGE_GIF;
|
|
||||||
} else if (headerBytes[0] == ((byte) 0xff) && headerBytes[1] == ((byte) 0xd8)) {
|
|
||||||
return MimeType.IMAGE_JPEG;
|
|
||||||
} else if (headerBytes[0] == ((byte) 0x89) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x4e) && headerBytes[3] == ((byte) 0x47) &&
|
|
||||||
headerBytes[4] == ((byte) 0x0d) && headerBytes[5] == ((byte) 0x0a) && headerBytes[6] == ((byte) 0x1a) && headerBytes[7] == ((byte) 0x0a)) {
|
|
||||||
return MimeType.IMAGE_PNG;
|
|
||||||
} else if (headerBytes[0] == ((byte) 0x25) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x44) && headerBytes[3] == ((byte) 0x46)) {
|
|
||||||
return MimeType.APPLICATION_PDF;
|
|
||||||
} else if (headerBytes[0] == ((byte) 0x00) && headerBytes[1] == ((byte) 0x00) && headerBytes[2] == ((byte) 0x00)
|
|
||||||
&& (headerBytes[3] == ((byte) 0x14) || headerBytes[3] == ((byte) 0x18) || headerBytes[3] == ((byte) 0x20))
|
|
||||||
&& headerBytes[4] == ((byte) 0x66) && headerBytes[5] == ((byte) 0x74) && headerBytes[6] == ((byte) 0x79) && headerBytes[7] == ((byte) 0x70)) {
|
|
||||||
return MimeType.VIDEO_MP4;
|
|
||||||
} else if (headerBytes[0] == ((byte) 0x1a) && headerBytes[1] == ((byte) 0x45) && headerBytes[2] == ((byte) 0xdf) && headerBytes[3] == ((byte) 0xa3)) {
|
|
||||||
return MimeType.VIDEO_WEBM;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detect by file extension
|
|
||||||
if (name != null) {
|
|
||||||
if (name.endsWith(".txt")) {
|
|
||||||
return MimeType.TEXT_PLAIN;
|
|
||||||
} else if (name.endsWith(".csv")) {
|
|
||||||
return MimeType.TEXT_CSV;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return MimeType.DEFAULT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -116,52 +67,4 @@ public class MimeTypeUtil {
|
|||||||
return "bin";
|
return "bin";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Guess the MIME type of open document formats (docx and odt).
|
|
||||||
* It's more costly than the simple header check, but needed because open document formats
|
|
||||||
* are simple ZIP files on the outside and much bigger on the inside.
|
|
||||||
*
|
|
||||||
* @param mimeType Currently detected MIME type
|
|
||||||
* @param file File on disk
|
|
||||||
* @return MIME type
|
|
||||||
*/
|
|
||||||
private static String guessOpenDocumentFormat(String mimeType, Path file) {
|
|
||||||
if (!MimeType.APPLICATION_ZIP.equals(mimeType)) {
|
|
||||||
// open document formats are ZIP files
|
|
||||||
return mimeType;
|
|
||||||
}
|
|
||||||
|
|
||||||
try (InputStream inputStream = Files.newInputStream(file);
|
|
||||||
ZipInputStream zipInputStream = new ZipInputStream(inputStream, Charsets.ISO_8859_1)) {
|
|
||||||
ZipEntry archiveEntry = zipInputStream.getNextEntry();
|
|
||||||
while (archiveEntry != null) {
|
|
||||||
if (archiveEntry.getName().equals("mimetype")) {
|
|
||||||
// Maybe it's an ODT file
|
|
||||||
String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1);
|
|
||||||
if (MimeType.OPEN_DOCUMENT_TEXT.equals(content.trim())) {
|
|
||||||
mimeType = MimeType.OPEN_DOCUMENT_TEXT;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else if (archiveEntry.getName().equals("[Content_Types].xml")) {
|
|
||||||
// Maybe it's a DOCX file
|
|
||||||
String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1);
|
|
||||||
if (content.contains(MimeType.OFFICE_DOCUMENT)) {
|
|
||||||
mimeType = MimeType.OFFICE_DOCUMENT;
|
|
||||||
break;
|
|
||||||
} else if (content.contains(MimeType.OFFICE_PRESENTATION)) {
|
|
||||||
mimeType = MimeType.OFFICE_PRESENTATION;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
archiveEntry = zipInputStream.getNextEntry();
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
// In case of any error, just give up and keep the ZIP MIME type
|
|
||||||
return mimeType;
|
|
||||||
}
|
|
||||||
|
|
||||||
return mimeType;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@ import java.nio.file.Paths;
|
|||||||
*/
|
*/
|
||||||
public class TestMimeTypeUtil {
|
public class TestMimeTypeUtil {
|
||||||
@Test
|
@Test
|
||||||
public void guessOpenDocumentFormatTest() throws Exception {
|
public void test() throws Exception {
|
||||||
// Detect ODT files
|
// Detect ODT files
|
||||||
Path path = Paths.get(ClassLoader.getSystemResource("file/document.odt").toURI());
|
Path path = Paths.get(ClassLoader.getSystemResource("file/document.odt").toURI());
|
||||||
Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessMimeType(path, "document.odt"));
|
Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessMimeType(path, "document.odt"));
|
||||||
@ -28,7 +28,44 @@ public class TestMimeTypeUtil {
|
|||||||
path = Paths.get(ClassLoader.getSystemResource("file/apache.pptx").toURI());
|
path = Paths.get(ClassLoader.getSystemResource("file/apache.pptx").toURI());
|
||||||
Assert.assertEquals(MimeType.OFFICE_PRESENTATION, MimeTypeUtil.guessMimeType(path, "apache.pptx"));
|
Assert.assertEquals(MimeType.OFFICE_PRESENTATION, MimeTypeUtil.guessMimeType(path, "apache.pptx"));
|
||||||
|
|
||||||
|
// Detect XLSX files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/document.xlsx").toURI());
|
||||||
|
Assert.assertEquals(MimeType.OFFICE_SHEET, MimeTypeUtil.guessMimeType(path, "document.xlsx"));
|
||||||
|
|
||||||
// Detect TXT files
|
// Detect TXT files
|
||||||
Assert.assertEquals(MimeType.TEXT_PLAIN, MimeTypeUtil.guessMimeType(path, "file.txt"));
|
path = Paths.get(ClassLoader.getSystemResource("file/document.txt").toURI());
|
||||||
|
Assert.assertEquals(MimeType.TEXT_PLAIN, MimeTypeUtil.guessMimeType(path, "document.txt"));
|
||||||
|
|
||||||
|
// Detect CSV files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/document.csv").toURI());
|
||||||
|
Assert.assertEquals(MimeType.TEXT_CSV, MimeTypeUtil.guessMimeType(path, "document.csv"));
|
||||||
|
|
||||||
|
// Detect PDF files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/udhr.pdf").toURI());
|
||||||
|
Assert.assertEquals(MimeType.APPLICATION_PDF, MimeTypeUtil.guessMimeType(path, "udhr.pdf"));
|
||||||
|
|
||||||
|
// Detect JPEG files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/apollo_portrait.jpg").toURI());
|
||||||
|
Assert.assertEquals(MimeType.IMAGE_JPEG, MimeTypeUtil.guessMimeType(path, "apollo_portrait.jpg"));
|
||||||
|
|
||||||
|
// Detect GIF files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/image.gif").toURI());
|
||||||
|
Assert.assertEquals(MimeType.IMAGE_GIF, MimeTypeUtil.guessMimeType(path, "image.gif"));
|
||||||
|
|
||||||
|
// Detect PNG files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/image.png").toURI());
|
||||||
|
Assert.assertEquals(MimeType.IMAGE_PNG, MimeTypeUtil.guessMimeType(path, "image.png"));
|
||||||
|
|
||||||
|
// Detect ZIP files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/document.zip").toURI());
|
||||||
|
Assert.assertEquals(MimeType.APPLICATION_ZIP, MimeTypeUtil.guessMimeType(path, "document.zip"));
|
||||||
|
|
||||||
|
// Detect WEBM files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/video.webm").toURI());
|
||||||
|
Assert.assertEquals(MimeType.VIDEO_WEBM, MimeTypeUtil.guessMimeType(path, "video.webm"));
|
||||||
|
|
||||||
|
// Detect MP4 files
|
||||||
|
path = Paths.get(ClassLoader.getSystemResource("file/video.mp4").toURI());
|
||||||
|
Assert.assertEquals(MimeType.VIDEO_MP4, MimeTypeUtil.guessMimeType(path, "video.mp4"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
2
docs-core/src/test/resources/file/document.csv
Normal file
2
docs-core/src/test/resources/file/document.csv
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
col1,col2
|
||||||
|
test,me
|
|
1
docs-core/src/test/resources/file/document.txt
Normal file
1
docs-core/src/test/resources/file/document.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
test me.
|
BIN
docs-core/src/test/resources/file/document.xlsx
Normal file
BIN
docs-core/src/test/resources/file/document.xlsx
Normal file
Binary file not shown.
BIN
docs-core/src/test/resources/file/document.zip
Normal file
BIN
docs-core/src/test/resources/file/document.zip
Normal file
Binary file not shown.
BIN
docs-core/src/test/resources/file/image.gif
Normal file
BIN
docs-core/src/test/resources/file/image.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.6 KiB |
BIN
docs-core/src/test/resources/file/image.png
Normal file
BIN
docs-core/src/test/resources/file/image.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 4.4 KiB |
BIN
docs-core/src/test/resources/file/video.mp4
Normal file
BIN
docs-core/src/test/resources/file/video.mp4
Normal file
Binary file not shown.
BIN
docs-core/src/test/resources/file/video.webm
Normal file
BIN
docs-core/src/test/resources/file/video.webm
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user