docs/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java

160 lines
6.3 KiB
Java

package com.sismics.util.mime;
import com.google.common.base.Charsets;
import com.sismics.docs.core.model.jpa.File;
import org.apache.commons.compress.utils.IOUtils;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* Utility to check MIME types.
*
* @author jtremeaux
*/
public class MimeTypeUtil {
/**
* Try to guess the MIME type of a file by its magic number (header).
*
* @param file File to inspect
* @param name File name
* @return MIME type
* @throws IOException e
*/
public static String guessMimeType(Path file, String name) throws IOException {
try (InputStream is = Files.newInputStream(file)) {
byte[] headerBytes = new byte[64];
is.read(headerBytes);
return guessMimeType(headerBytes, name);
}
}
/**
* Try to guess the MIME type of a file by its magic number (header).
*
* @param headerBytes File header (first bytes)
* @param name File name
* @return MIME type
* @throws UnsupportedEncodingException e
*/
public static String guessMimeType(byte[] headerBytes, String name) throws UnsupportedEncodingException {
String header = new String(headerBytes, "US-ASCII");
// Detect by header bytes
if (header.startsWith("PK")) {
return MimeType.APPLICATION_ZIP;
} else if (header.startsWith("GIF87a") || header.startsWith("GIF89a")) {
return MimeType.IMAGE_GIF;
} else if (headerBytes[0] == ((byte) 0xff) && headerBytes[1] == ((byte) 0xd8)) {
return MimeType.IMAGE_JPEG;
} else if (headerBytes[0] == ((byte) 0x89) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x4e) && headerBytes[3] == ((byte) 0x47) &&
headerBytes[4] == ((byte) 0x0d) && headerBytes[5] == ((byte) 0x0a) && headerBytes[6] == ((byte) 0x1a) && headerBytes[7] == ((byte) 0x0a)) {
return MimeType.IMAGE_PNG;
} else if (headerBytes[0] == ((byte) 0x25) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x44) && headerBytes[3] == ((byte) 0x46)) {
return MimeType.APPLICATION_PDF;
} else if (headerBytes[0] == ((byte) 0x00) && headerBytes[1] == ((byte) 0x00) && headerBytes[2] == ((byte) 0x00) && (headerBytes[3] == ((byte) 0x14) || headerBytes[3] == ((byte) 0x18))
&& headerBytes[4] == ((byte) 0x66) && headerBytes[5] == ((byte) 0x74) && headerBytes[6] == ((byte) 0x79) && headerBytes[7] == ((byte) 0x70)) {
return MimeType.VIDEO_MP4;
} else if (headerBytes[0] == ((byte) 0x1a) && headerBytes[1] == ((byte) 0x45) && headerBytes[2] == ((byte) 0xdf) && headerBytes[3] == ((byte) 0xa3)) {
return MimeType.VIDEO_WEBM;
}
// Detect by file extension
if (name != null) {
if (name.endsWith(".txt")) {
return MimeType.TEXT_PLAIN;
} else if (name.endsWith(".csv")) {
return MimeType.TEXT_CSV;
}
}
return MimeType.DEFAULT;
}
/**
* Get a file extension linked to a MIME type.
*
* @param mimeType MIME type
* @return File extension
*/
public static String getFileExtension(String mimeType) {
switch (mimeType) {
case MimeType.APPLICATION_ZIP:
return "zip";
case MimeType.IMAGE_GIF:
return "gif";
case MimeType.IMAGE_JPEG:
return "jpg";
case MimeType.IMAGE_PNG:
return "png";
case MimeType.APPLICATION_PDF:
return "pdf";
case MimeType.OPEN_DOCUMENT_TEXT:
return "odt";
case MimeType.OFFICE_DOCUMENT:
return "docx";
case MimeType.TEXT_PLAIN:
return "txt";
case MimeType.TEXT_CSV:
return "csv";
case MimeType.VIDEO_MP4:
return "mp4";
case MimeType.VIDEO_WEBM:
return "webm";
default:
return "bin";
}
}
/**
* Guess the MIME type of open document formats (docx and odt).
* It's more costly than the simple header check, but needed because open document formats
* are simple ZIP files on the outside and much bigger on the inside.
*
* @param file File
* @param unencryptedFile File on disk
* @return MIME type
*/
public static String guessOpenDocumentFormat(File file, Path unencryptedFile) {
if (!MimeType.APPLICATION_ZIP.equals(file.getMimeType())) {
// open document formats are ZIP files
return file.getMimeType();
}
String mimeType = file.getMimeType();
try (InputStream inputStream = Files.newInputStream(unencryptedFile);
ZipInputStream zipInputStream = new ZipInputStream(inputStream, Charsets.ISO_8859_1)) {
ZipEntry archiveEntry = zipInputStream.getNextEntry();
while (archiveEntry != null) {
if (archiveEntry.getName().equals("mimetype")) {
// Maybe it's an ODT file
String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1);
if (MimeType.OPEN_DOCUMENT_TEXT.equals(content.trim())) {
mimeType = MimeType.OPEN_DOCUMENT_TEXT;
break;
}
} else if (archiveEntry.getName().equals("[Content_Types].xml")) {
// Maybe it's a DOCX file
String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1);
if (content.contains(MimeType.OFFICE_DOCUMENT)) {
mimeType = MimeType.OFFICE_DOCUMENT;
break;
}
}
archiveEntry = zipInputStream.getNextEntry();
}
} catch (Exception e) {
// In case of any error, just give up and keep the ZIP MIME type
return file.getMimeType();
}
return mimeType;
}
}