mirror of
https://github.com/sismics/docs.git
synced 2024-11-22 14:07:55 +01:00
Closes #141: Never close full file content in memory
This commit is contained in:
parent
4d161aea07
commit
244ddc7ce2
@ -1,6 +1,7 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import com.google.common.base.MoreObjects;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
@ -22,16 +23,16 @@ public class FileCreatedAsyncEvent extends UserEvent {
|
||||
private String language;
|
||||
|
||||
/**
|
||||
* Unencrypted input stream containing the file.
|
||||
* Unencrypted original file.
|
||||
*/
|
||||
private InputStream inputStream;
|
||||
private Path unencryptedFile;
|
||||
|
||||
/**
|
||||
* Unencrypted input stream containing a PDF representation
|
||||
* of the file. May be null if the PDF conversion is not
|
||||
* Unencrypted file containing PDF representation
|
||||
* of the original file. May be null if the PDF conversion is not
|
||||
* necessary or not possible.
|
||||
*/
|
||||
private InputStream pdfInputStream;
|
||||
private Path unencryptedPdfFile;
|
||||
|
||||
public File getFile() {
|
||||
return file;
|
||||
@ -48,21 +49,23 @@ public class FileCreatedAsyncEvent extends UserEvent {
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
public InputStream getInputStream() {
|
||||
return inputStream;
|
||||
|
||||
public Path getUnencryptedFile() {
|
||||
return unencryptedFile;
|
||||
}
|
||||
|
||||
public void setInputStream(InputStream inputStream) {
|
||||
this.inputStream = inputStream;
|
||||
}
|
||||
|
||||
public InputStream getPdfInputStream() {
|
||||
return pdfInputStream;
|
||||
public FileCreatedAsyncEvent setUnencryptedFile(Path unencryptedFile) {
|
||||
this.unencryptedFile = unencryptedFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
public void setPdfInputStream(InputStream pdfInputStream) {
|
||||
this.pdfInputStream = pdfInputStream;
|
||||
public Path getUnencryptedPdfFile() {
|
||||
return unencryptedPdfFile;
|
||||
}
|
||||
|
||||
public FileCreatedAsyncEvent setUnencryptedPdfFile(Path unencryptedPdfFile) {
|
||||
this.unencryptedPdfFile = unencryptedPdfFile;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -0,0 +1,35 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import com.google.common.base.MoreObjects;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Cleanup temporary files event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class TemporaryFileCleanupAsyncEvent {
|
||||
/**
|
||||
* Temporary files.
|
||||
*/
|
||||
private List<Path> fileList;
|
||||
|
||||
public TemporaryFileCleanupAsyncEvent(List<Path> fileList) {
|
||||
this.fileList = fileList;
|
||||
}
|
||||
|
||||
public List<Path> getFileList() {
|
||||
return fileList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("files", fileList)
|
||||
.toString();
|
||||
}
|
||||
}
|
@ -1,10 +1,5 @@
|
||||
package com.sismics.docs.core.listener.async;
|
||||
|
||||
import java.text.MessageFormat;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.eventbus.Subscribe;
|
||||
import com.sismics.docs.core.dao.jpa.FileDao;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
@ -12,6 +7,10 @@ import com.sismics.docs.core.event.FileCreatedAsyncEvent;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.util.FileUtil;
|
||||
import com.sismics.docs.core.util.TransactionUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.text.MessageFormat;
|
||||
|
||||
/**
|
||||
* Listener on file created.
|
||||
@ -28,7 +27,7 @@ public class FileCreatedAsyncListener {
|
||||
* File created.
|
||||
*
|
||||
* @param fileCreatedAsyncEvent File created event
|
||||
* @throws Exception
|
||||
* @throws Exception e
|
||||
*/
|
||||
@Subscribe
|
||||
public void on(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception {
|
||||
@ -42,11 +41,7 @@ public class FileCreatedAsyncListener {
|
||||
// Extract text content from the file
|
||||
long startTime = System.currentTimeMillis();
|
||||
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getLanguage(), file,
|
||||
fileCreatedAsyncEvent.getInputStream(), fileCreatedAsyncEvent.getPdfInputStream());
|
||||
fileCreatedAsyncEvent.getInputStream().close();
|
||||
if (fileCreatedAsyncEvent.getPdfInputStream() != null) {
|
||||
fileCreatedAsyncEvent.getPdfInputStream().close();
|
||||
}
|
||||
fileCreatedAsyncEvent.getUnencryptedFile(), fileCreatedAsyncEvent.getUnencryptedPdfFile());
|
||||
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
|
||||
|
||||
// Store the text content in the database
|
||||
|
@ -0,0 +1,38 @@
|
||||
package com.sismics.docs.core.listener.async;
|
||||
|
||||
import com.google.common.eventbus.Subscribe;
|
||||
import com.sismics.docs.core.event.TemporaryFileCleanupAsyncEvent;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* Listener to cleanup temporary files created during a request.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class TemporaryFileCleanupAsyncListener {
|
||||
/**
|
||||
* Logger.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
|
||||
|
||||
/**
|
||||
* Cleanup temporary files.
|
||||
*
|
||||
* @param event Temporary file cleanup event
|
||||
* @throws Exception
|
||||
*/
|
||||
@Subscribe
|
||||
public void on(final TemporaryFileCleanupAsyncEvent event) throws Exception {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Cleanup temporary files event: " + event.toString());
|
||||
}
|
||||
|
||||
for (Path file : event.getFileList()) {
|
||||
Files.delete(file);
|
||||
}
|
||||
}
|
||||
}
|
@ -12,12 +12,8 @@ import com.google.common.eventbus.EventBus;
|
||||
import com.lowagie.text.FontFactory;
|
||||
import com.sismics.docs.core.constant.ConfigType;
|
||||
import com.sismics.docs.core.dao.jpa.ConfigDao;
|
||||
import com.sismics.docs.core.listener.async.DocumentCreatedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.DocumentDeletedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.DocumentUpdatedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.FileCreatedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.FileDeletedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.RebuildIndexAsyncListener;
|
||||
import com.sismics.docs.core.event.TemporaryFileCleanupAsyncEvent;
|
||||
import com.sismics.docs.core.listener.async.*;
|
||||
import com.sismics.docs.core.listener.sync.DeadEventListener;
|
||||
import com.sismics.docs.core.model.jpa.Config;
|
||||
import com.sismics.docs.core.service.IndexingService;
|
||||
@ -86,6 +82,7 @@ public class AppContext {
|
||||
asyncEventBus.register(new DocumentUpdatedAsyncListener());
|
||||
asyncEventBus.register(new DocumentDeletedAsyncListener());
|
||||
asyncEventBus.register(new RebuildIndexAsyncListener());
|
||||
asyncEventBus.register(new TemporaryFileCleanupAsyncListener());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -132,6 +129,7 @@ public class AppContext {
|
||||
if (EnvironmentUtil.isUnitTest()) {
|
||||
return new EventBus();
|
||||
} else {
|
||||
// /!\ Don't add more threads because a cleanup event is fired at the end of each request
|
||||
ThreadPoolExecutor executor = new ThreadPoolExecutor(1, 1,
|
||||
0L, TimeUnit.MILLISECONDS,
|
||||
new LinkedBlockingQueue<Runnable>());
|
||||
|
@ -1,20 +1,22 @@
|
||||
package com.sismics.docs.core.util;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.math.BigInteger;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.SecureRandom;
|
||||
import java.security.Security;
|
||||
import com.google.common.base.Strings;
|
||||
import com.sismics.util.context.ThreadLocalContext;
|
||||
import org.bouncycastle.jce.provider.BouncyCastleProvider;
|
||||
|
||||
import javax.crypto.Cipher;
|
||||
import javax.crypto.CipherInputStream;
|
||||
import javax.crypto.SecretKey;
|
||||
import javax.crypto.SecretKeyFactory;
|
||||
import javax.crypto.spec.PBEKeySpec;
|
||||
|
||||
import org.bouncycastle.jce.provider.BouncyCastleProvider;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import java.io.InputStream;
|
||||
import java.math.BigInteger;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.SecureRandom;
|
||||
import java.security.Security;
|
||||
|
||||
/**
|
||||
* Encryption utilities.
|
||||
@ -55,7 +57,28 @@ public class EncryptionUtil {
|
||||
public static InputStream decryptInputStream(InputStream is, String privateKey) throws Exception {
|
||||
return new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decrypt a file to a temporary file using the specified private key.
|
||||
*
|
||||
* @param file Encrypted file
|
||||
* @param privateKey Private key
|
||||
* @return Decrypted temporary file
|
||||
* @throws Exception
|
||||
*/
|
||||
public static Path decryptFile(Path file, String privateKey) throws Exception {
|
||||
if (privateKey == null) {
|
||||
// For unit testing
|
||||
return file;
|
||||
}
|
||||
|
||||
Path tmpFile = ThreadLocalContext.get().createTemporaryFile();
|
||||
try (InputStream is = Files.newInputStream(file)) {
|
||||
Files.copy(new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE)), tmpFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
}
|
||||
return tmpFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an encryption cipher.
|
||||
*
|
||||
|
@ -36,34 +36,34 @@ public class FileUtil {
|
||||
*
|
||||
* @param language Language to extract
|
||||
* @param file File to extract
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param pdfInputStream Unencrypted PDF input stream
|
||||
* @param unencryptedFile Unencrypted file
|
||||
* @param unencryptedPdfFile Unencrypted PDF file
|
||||
* @return Content extract
|
||||
*/
|
||||
public static String extractContent(String language, File file, InputStream inputStream, InputStream pdfInputStream) {
|
||||
public static String extractContent(String language, File file, Path unencryptedFile, Path unencryptedPdfFile) {
|
||||
String content = null;
|
||||
|
||||
if (ImageUtil.isImage(file.getMimeType())) {
|
||||
content = ocrFile(inputStream, language);
|
||||
} else if (pdfInputStream != null) {
|
||||
content = PdfUtil.extractPdf(pdfInputStream);
|
||||
content = ocrFile(unencryptedFile, language);
|
||||
} else if (unencryptedPdfFile != null) {
|
||||
content = PdfUtil.extractPdf(unencryptedPdfFile);
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optical character recognition on a stream.
|
||||
* Optical character recognition on a file.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param unecryptedFile Unencrypted file
|
||||
* @param language Language to OCR
|
||||
* @return Content extracted
|
||||
*/
|
||||
private static String ocrFile(InputStream inputStream, String language) {
|
||||
private static String ocrFile(Path unecryptedFile, String language) {
|
||||
Tesseract instance = Tesseract.getInstance();
|
||||
String content = null;
|
||||
BufferedImage image;
|
||||
try {
|
||||
try (InputStream inputStream = Files.newInputStream(unecryptedFile)) {
|
||||
image = ImageIO.read(inputStream);
|
||||
} catch (IOException e) {
|
||||
log.error("Error reading the image", e);
|
||||
@ -90,38 +90,39 @@ public class FileUtil {
|
||||
/**
|
||||
* Save a file on the storage filesystem.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param pdfInputStream PDF input stream
|
||||
* @param unencryptedFile Unencrypted file
|
||||
* @param unencryptedPdfFile Unencrypted PDF file
|
||||
* @param file File to save
|
||||
* @param privateKey Private key used for encryption
|
||||
*/
|
||||
public static void save(InputStream inputStream, InputStream pdfInputStream, File file, String privateKey) throws Exception {
|
||||
public static void save(Path unencryptedFile, Path unencryptedPdfFile, File file, String privateKey) throws Exception {
|
||||
Cipher cipher = EncryptionUtil.getEncryptionCipher(privateKey);
|
||||
Path path = DirectoryUtil.getStorageDirectory().resolve(file.getId());
|
||||
Files.copy(new CipherInputStream(inputStream, cipher), path);
|
||||
inputStream.reset();
|
||||
|
||||
try (InputStream inputStream = Files.newInputStream(unencryptedFile)) {
|
||||
Files.copy(new CipherInputStream(inputStream, cipher), path);
|
||||
}
|
||||
|
||||
// Generate file variations
|
||||
saveVariations(file, inputStream, pdfInputStream, cipher);
|
||||
saveVariations(file, unencryptedFile, unencryptedPdfFile, cipher);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate file variations.
|
||||
*
|
||||
* @param file File from database
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param pdfInputStream Unencrypted PDF input stream
|
||||
* @param unencryptedFile Unencrypted file
|
||||
* @param unencryptedPdfFile Unencrypted PDF file
|
||||
* @param cipher Cipher to use for encryption
|
||||
*/
|
||||
private static void saveVariations(File file, InputStream inputStream, InputStream pdfInputStream, Cipher cipher) throws Exception {
|
||||
private static void saveVariations(File file, Path unencryptedFile, Path unencryptedPdfFile, Cipher cipher) throws Exception {
|
||||
BufferedImage image = null;
|
||||
if (ImageUtil.isImage(file.getMimeType())) {
|
||||
image = ImageIO.read(inputStream);
|
||||
inputStream.reset();
|
||||
} else if(pdfInputStream != null) {
|
||||
try (InputStream inputStream = Files.newInputStream(unencryptedFile)) {
|
||||
image = ImageIO.read(inputStream);
|
||||
}
|
||||
} else if (unencryptedPdfFile != null) {
|
||||
// Generate preview from the first page of the PDF
|
||||
image = PdfUtil.renderFirstPage(pdfInputStream);
|
||||
pdfInputStream.reset();
|
||||
image = PdfUtil.renderFirstPage(unencryptedPdfFile);
|
||||
}
|
||||
|
||||
if (image != null) {
|
||||
|
@ -3,7 +3,6 @@ package com.sismics.docs.core.util;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.io.ByteStreams;
|
||||
import com.google.common.io.CharStreams;
|
||||
import com.google.common.io.Closer;
|
||||
import com.google.common.io.Resources;
|
||||
import com.lowagie.text.*;
|
||||
@ -12,6 +11,7 @@ import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.util.pdf.PdfPage;
|
||||
import com.sismics.util.ImageUtil;
|
||||
import com.sismics.util.context.ThreadLocalContext;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
import org.apache.pdfbox.io.MemoryUsageSetting;
|
||||
import org.apache.pdfbox.multipdf.PDFMergerUtility;
|
||||
@ -34,7 +34,9 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.*;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
@ -56,13 +58,13 @@ public class PdfUtil {
|
||||
/**
|
||||
* Extract text from a PDF.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param unencryptedPdfFile Unencrypted PDF file
|
||||
* @return Content extracted
|
||||
*/
|
||||
public static String extractPdf(InputStream inputStream) {
|
||||
public static String extractPdf(Path unencryptedPdfFile) {
|
||||
String content = null;
|
||||
PDDocument pdfDocument = null;
|
||||
try {
|
||||
try (InputStream inputStream = Files.newInputStream(unencryptedPdfFile)) {
|
||||
PDFTextStripper stripper = new PDFTextStripper();
|
||||
pdfDocument = PDDocument.load(inputStream);
|
||||
content = stripper.getText(pdfDocument);
|
||||
@ -85,26 +87,25 @@ public class PdfUtil {
|
||||
* Convert a file to PDF if necessary.
|
||||
*
|
||||
* @param file File
|
||||
* @param inputStream InputStream
|
||||
* @param reset Reset the stream after usage
|
||||
* @return PDF input stream
|
||||
* @param unencryptedFile Unencrypted file
|
||||
* @return PDF temporary file
|
||||
*/
|
||||
public static InputStream convertToPdf(File file, InputStream inputStream, boolean reset) throws Exception {
|
||||
public static Path convertToPdf(File file, Path unencryptedFile) throws Exception {
|
||||
if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
||||
// It's already PDF, just return the input
|
||||
return inputStream;
|
||||
// It's already PDF, just return the file
|
||||
return unencryptedFile;
|
||||
}
|
||||
|
||||
if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) {
|
||||
return convertOfficeDocument(inputStream, reset);
|
||||
return convertOfficeDocument(unencryptedFile);
|
||||
}
|
||||
|
||||
if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) {
|
||||
return convertOpenDocumentText(inputStream, reset);
|
||||
return convertOpenDocumentText(unencryptedFile);
|
||||
}
|
||||
|
||||
if (file.getMimeType().equals(MimeType.TEXT_PLAIN) || file.getMimeType().equals(MimeType.TEXT_CSV)) {
|
||||
return convertTextPlain(inputStream, reset);
|
||||
return convertTextPlain(unencryptedFile);
|
||||
}
|
||||
|
||||
// PDF conversion not necessary/possible
|
||||
@ -114,64 +115,58 @@ public class PdfUtil {
|
||||
/**
|
||||
* Convert a text plain document to PDF.
|
||||
*
|
||||
* @param inputStream Unecnrypted input stream
|
||||
* @param reset Reset the stream after usage
|
||||
* @return PDF input stream
|
||||
* @param unencryptedFile Unencrypted file
|
||||
* @return PDF file
|
||||
*/
|
||||
private static InputStream convertTextPlain(InputStream inputStream, boolean reset) throws Exception {
|
||||
private static Path convertTextPlain(Path unencryptedFile) throws Exception {
|
||||
Document output = new Document(PageSize.A4, 40, 40, 40, 40);
|
||||
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||
Path tempFile = ThreadLocalContext.get().createTemporaryFile();
|
||||
OutputStream pdfOutputStream = Files.newOutputStream(tempFile);
|
||||
PdfWriter.getInstance(output, pdfOutputStream);
|
||||
|
||||
output.open();
|
||||
String content = CharStreams.toString(new InputStreamReader(inputStream, Charsets.UTF_8));
|
||||
String content = new String(Files.readAllBytes(unencryptedFile), Charsets.UTF_8);
|
||||
Font font = FontFactory.getFont("LiberationMono-Regular");
|
||||
Paragraph paragraph = new Paragraph(content, font);
|
||||
paragraph.setAlignment(Element.ALIGN_LEFT);
|
||||
output.add(paragraph);
|
||||
output.close();
|
||||
|
||||
if (reset) {
|
||||
inputStream.reset();
|
||||
}
|
||||
|
||||
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an open document text file to PDF.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param reset Reset the stream after usage
|
||||
* @return PDF input stream
|
||||
* @param unencryptedFile Unencrypted file
|
||||
* @return PDF file
|
||||
*/
|
||||
private static InputStream convertOpenDocumentText(InputStream inputStream, boolean reset) throws Exception {
|
||||
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||
OdfTextDocument document = OdfTextDocument.loadDocument(inputStream);
|
||||
PdfOptions options = PdfOptions.create();
|
||||
PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
||||
if (reset) {
|
||||
inputStream.reset();
|
||||
private static Path convertOpenDocumentText(Path unencryptedFile) throws Exception {
|
||||
Path tempFile = ThreadLocalContext.get().createTemporaryFile();
|
||||
try (InputStream inputStream = Files.newInputStream(unencryptedFile);
|
||||
OutputStream outputStream = Files.newOutputStream(tempFile)) {
|
||||
OdfTextDocument document = OdfTextDocument.loadDocument(inputStream);
|
||||
PdfOptions options = PdfOptions.create();
|
||||
PdfConverter.getInstance().convert(document, outputStream, options);
|
||||
}
|
||||
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an Office document to PDF.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param reset Reset the stream after usage
|
||||
* @return PDF input stream
|
||||
* @param unencryptedFile Unencrypted file
|
||||
* @return PDF file
|
||||
*/
|
||||
private static InputStream convertOfficeDocument(InputStream inputStream, boolean reset) throws Exception {
|
||||
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||
XWPFDocument document = new XWPFDocument(inputStream);
|
||||
org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create();
|
||||
org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
||||
if (reset) {
|
||||
inputStream.reset();
|
||||
private static Path convertOfficeDocument(Path unencryptedFile) throws Exception {
|
||||
Path tempFile = ThreadLocalContext.get().createTemporaryFile();
|
||||
try (InputStream inputStream = Files.newInputStream(unencryptedFile);
|
||||
OutputStream outputStream = Files.newOutputStream(tempFile)) {
|
||||
XWPFDocument document = new XWPFDocument(inputStream);
|
||||
org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create();
|
||||
org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, outputStream, options);
|
||||
}
|
||||
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -182,10 +177,10 @@ public class PdfUtil {
|
||||
* @param fitImageToPage Fit images to the page
|
||||
* @param metadata Add a page with metadata
|
||||
* @param margin Margins in millimeters
|
||||
* @return PDF input stream
|
||||
* @param outputStream Output stream to write to, will be closed
|
||||
*/
|
||||
public static InputStream convertToPdf(DocumentDto documentDto, List<File> fileList,
|
||||
boolean fitImageToPage, boolean metadata, int margin) throws Exception {
|
||||
public static void convertToPdf(DocumentDto documentDto, List<File> fileList,
|
||||
boolean fitImageToPage, boolean metadata, int margin, OutputStream outputStream) throws Exception {
|
||||
// Setup PDFBox
|
||||
Closer closer = Closer.create();
|
||||
MemoryUsageSetting memUsageSettings = MemoryUsageSetting.setupMixed(1000000); // 1MB max memory usage
|
||||
@ -240,80 +235,75 @@ public class PdfUtil {
|
||||
// Add files
|
||||
for (File file : fileList) {
|
||||
Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file.getId());
|
||||
try (InputStream storedFileInputStream = file.getPrivateKey() == null ? // Try to decrypt the file if we have a private key available
|
||||
Files.newInputStream(storedFile) : EncryptionUtil.decryptInputStream(Files.newInputStream(storedFile), file.getPrivateKey())) {
|
||||
if (ImageUtil.isImage(file.getMimeType())) {
|
||||
PDPage page = new PDPage(PDRectangle.A4); // Images into A4 pages
|
||||
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
|
||||
// Read the image using the correct handler. PDFBox can't do it because it relies wrongly on file extension
|
||||
PDImageXObject pdImage = null;
|
||||
if (file.getMimeType().equals(MimeType.IMAGE_JPEG)) {
|
||||
pdImage = JPEGFactory.createFromStream(doc, storedFileInputStream);
|
||||
} else if (file.getMimeType().equals(MimeType.IMAGE_GIF) || file.getMimeType().equals(MimeType.IMAGE_PNG)) {
|
||||
BufferedImage bim = ImageIO.read(storedFileInputStream);
|
||||
pdImage = LosslessFactory.createFromImage(doc, bim);
|
||||
}
|
||||
|
||||
// Do we want to fill the page with the image?
|
||||
if (fitImageToPage) {
|
||||
// Fill the page with the image
|
||||
float widthAvailable = page.getMediaBox().getWidth() - 2 * margin * mmPerInch;
|
||||
float heightAvailable = page.getMediaBox().getHeight() - 2 * margin * mmPerInch;
|
||||
|
||||
// Compare page format and image format
|
||||
if (widthAvailable / heightAvailable < (float) pdImage.getWidth() / (float) pdImage.getHeight()) {
|
||||
float imageHeight = widthAvailable / pdImage.getWidth() * pdImage.getHeight();
|
||||
contentStream.drawImage(pdImage, margin * mmPerInch, heightAvailable + margin * mmPerInch - imageHeight,
|
||||
widthAvailable, imageHeight);
|
||||
} else {
|
||||
float imageWidth = heightAvailable / pdImage.getHeight() * pdImage.getWidth();
|
||||
contentStream.drawImage(pdImage, margin * mmPerInch, margin * mmPerInch,
|
||||
imageWidth, heightAvailable);
|
||||
}
|
||||
|
||||
// Decrypt the file to a temporary file
|
||||
Path unencryptedFile = EncryptionUtil.decryptFile(storedFile, file.getPrivateKey());
|
||||
|
||||
if (ImageUtil.isImage(file.getMimeType())) {
|
||||
PDPage page = new PDPage(PDRectangle.A4); // Images into A4 pages
|
||||
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page);
|
||||
InputStream storedFileInputStream = Files.newInputStream(unencryptedFile)) {
|
||||
// Read the image using the correct handler. PDFBox can't do it because it relies wrongly on file extension
|
||||
PDImageXObject pdImage = null;
|
||||
if (file.getMimeType().equals(MimeType.IMAGE_JPEG)) {
|
||||
pdImage = JPEGFactory.createFromStream(doc, storedFileInputStream);
|
||||
} else if (file.getMimeType().equals(MimeType.IMAGE_GIF) || file.getMimeType().equals(MimeType.IMAGE_PNG)) {
|
||||
BufferedImage bim = ImageIO.read(storedFileInputStream);
|
||||
pdImage = LosslessFactory.createFromImage(doc, bim);
|
||||
}
|
||||
|
||||
// Do we want to fill the page with the image?
|
||||
if (fitImageToPage) {
|
||||
// Fill the page with the image
|
||||
float widthAvailable = page.getMediaBox().getWidth() - 2 * margin * mmPerInch;
|
||||
float heightAvailable = page.getMediaBox().getHeight() - 2 * margin * mmPerInch;
|
||||
|
||||
// Compare page format and image format
|
||||
if (widthAvailable / heightAvailable < (float) pdImage.getWidth() / (float) pdImage.getHeight()) {
|
||||
float imageHeight = widthAvailable / pdImage.getWidth() * pdImage.getHeight();
|
||||
contentStream.drawImage(pdImage, margin * mmPerInch, heightAvailable + margin * mmPerInch - imageHeight,
|
||||
widthAvailable, imageHeight);
|
||||
} else {
|
||||
// Draw the image as is
|
||||
contentStream.drawImage(pdImage, margin * mmPerInch,
|
||||
page.getMediaBox().getHeight() - pdImage.getHeight() - margin * mmPerInch);
|
||||
float imageWidth = heightAvailable / pdImage.getHeight() * pdImage.getWidth();
|
||||
contentStream.drawImage(pdImage, margin * mmPerInch, margin * mmPerInch,
|
||||
imageWidth, heightAvailable);
|
||||
}
|
||||
} else {
|
||||
// Draw the image as is
|
||||
contentStream.drawImage(pdImage, margin * mmPerInch,
|
||||
page.getMediaBox().getHeight() - pdImage.getHeight() - margin * mmPerInch);
|
||||
}
|
||||
doc.addPage(page);
|
||||
} else {
|
||||
// Try to convert the file to PDF
|
||||
InputStream pdfInputStream = convertToPdf(file, storedFileInputStream, false);
|
||||
if (pdfInputStream != null) {
|
||||
// This file is convertible to PDF, just add it to the end
|
||||
try {
|
||||
PDDocument mergeDoc = PDDocument.load(pdfInputStream, memUsageSettings);
|
||||
closer.register(mergeDoc);
|
||||
PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();
|
||||
pdfMergerUtility.appendDocument(doc, mergeDoc);
|
||||
} finally {
|
||||
pdfInputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
// All other non-PDF-convertible files are ignored
|
||||
}
|
||||
doc.addPage(page);
|
||||
} else {
|
||||
// Try to convert the file to PDF
|
||||
Path unencryptedPdfFile = convertToPdf(file, unencryptedFile);
|
||||
if (unencryptedPdfFile != null) {
|
||||
// This file is convertible to PDF, just add it to the end
|
||||
PDDocument mergeDoc = PDDocument.load(unencryptedPdfFile.toFile(), memUsageSettings);
|
||||
closer.register(mergeDoc);
|
||||
PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();
|
||||
pdfMergerUtility.appendDocument(doc, mergeDoc);
|
||||
}
|
||||
|
||||
// All other non-PDF-convertible files are ignored
|
||||
}
|
||||
}
|
||||
|
||||
// Save to a temporary file
|
||||
try (TemporaryFileStream temporaryFileStream = new TemporaryFileStream()) {
|
||||
doc.save(temporaryFileStream.openWriteStream());
|
||||
closer.close(); // Close all remaining opened PDF
|
||||
return temporaryFileStream.openReadStream();
|
||||
}
|
||||
doc.save(outputStream); // Write to the output stream
|
||||
closer.close(); // Close all remaining opened PDF
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Render the first page of a PDF.
|
||||
*
|
||||
* @param inputStream PDF document
|
||||
* @param unencryptedFile PDF document
|
||||
* @return Render of the first page
|
||||
*/
|
||||
public static BufferedImage renderFirstPage(InputStream inputStream) throws IOException {
|
||||
try (PDDocument pdfDocument = PDDocument.load(inputStream)) {
|
||||
public static BufferedImage renderFirstPage(Path unencryptedFile) throws IOException {
|
||||
try (InputStream inputStream = Files.newInputStream(unencryptedFile);
|
||||
PDDocument pdfDocument = PDDocument.load(inputStream)) {
|
||||
PDFRenderer renderer = new PDFRenderer(pdfDocument);
|
||||
return renderer.renderImage(0);
|
||||
}
|
||||
|
@ -1,55 +0,0 @@
|
||||
package com.sismics.docs.core.util;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Utilities for writing and reading to a temporary file.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class TemporaryFileStream implements Closeable {
|
||||
/**
|
||||
* Temporary file.
|
||||
*/
|
||||
private Path tempFile;
|
||||
|
||||
/**
|
||||
* Construct a temporary file.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public TemporaryFileStream() throws IOException {
|
||||
tempFile = Files.createTempFile(UUID.randomUUID().toString(), ".tmp");
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a stream for writing.
|
||||
*
|
||||
* @return OutputStream
|
||||
* @throws IOException
|
||||
*/
|
||||
public OutputStream openWriteStream() throws IOException {
|
||||
return Files.newOutputStream(tempFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a stream for reading.
|
||||
*
|
||||
* @return InputStream
|
||||
* @throws IOException
|
||||
*/
|
||||
public InputStream openReadStream() throws IOException {
|
||||
return Files.newInputStream(tempFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
Files.delete(tempFile);
|
||||
}
|
||||
}
|
@ -1,9 +1,13 @@
|
||||
package com.sismics.util.context;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.sismics.docs.core.event.TemporaryFileCleanupAsyncEvent;
|
||||
import com.sismics.docs.core.model.context.AppContext;
|
||||
|
||||
import javax.persistence.EntityManager;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
@ -27,6 +31,12 @@ public class ThreadLocalContext {
|
||||
*/
|
||||
private List<Object> asyncEventList = Lists.newArrayList();
|
||||
|
||||
/**
|
||||
* List of temporary files created during this request.
|
||||
* They are deleted at the end of each request.
|
||||
*/
|
||||
private List<Path> temporaryFileList = Lists.newArrayList();
|
||||
|
||||
/**
|
||||
* Private constructor.
|
||||
*/
|
||||
@ -82,6 +92,17 @@ public class ThreadLocalContext {
|
||||
asyncEventList.add(asyncEvent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a temporary file linked to the request.
|
||||
*
|
||||
* @return New temporary file
|
||||
*/
|
||||
public Path createTemporaryFile() throws IOException {
|
||||
Path path = Files.createTempFile("sismics_docs", null);
|
||||
temporaryFileList.add(path);
|
||||
return path;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fire all pending async events.
|
||||
*/
|
||||
@ -89,5 +110,11 @@ public class ThreadLocalContext {
|
||||
for (Object asyncEvent : asyncEventList) {
|
||||
AppContext.getInstance().getAsyncEventBus().post(asyncEvent);
|
||||
}
|
||||
|
||||
if (!temporaryFileList.isEmpty()) {
|
||||
// Some files were created during this request, add a cleanup event to the queue
|
||||
// It works because we are using a one thread executor
|
||||
AppContext.getInstance().getAsyncEventBus().post(new TemporaryFileCleanupAsyncEvent(temporaryFileList));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,10 @@ package com.sismics.util.mime;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.apache.commons.compress.archivers.ArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
|
||||
@ -20,17 +24,17 @@ public class MimeTypeUtil {
|
||||
/**
|
||||
* Try to guess the MIME type of a file by its magic number (header).
|
||||
*
|
||||
* @param is Stream to inspect
|
||||
* @param file File to inspect
|
||||
* @param name File name
|
||||
* @return MIME type
|
||||
* @throws IOException e
|
||||
*/
|
||||
public static String guessMimeType(InputStream is, String name) throws IOException {
|
||||
byte[] headerBytes = new byte[64];
|
||||
is.mark(headerBytes.length);
|
||||
is.read(headerBytes);
|
||||
is.reset();
|
||||
return guessMimeType(headerBytes, name);
|
||||
public static String guessMimeType(Path file, String name) throws IOException {
|
||||
try (InputStream is = Files.newInputStream(file)) {
|
||||
byte[] headerBytes = new byte[64];
|
||||
is.read(headerBytes);
|
||||
return guessMimeType(headerBytes, name);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -107,39 +111,38 @@ public class MimeTypeUtil {
|
||||
* are simple ZIP files on the outside and much bigger on the inside.
|
||||
*
|
||||
* @param file File
|
||||
* @param inputStream Input stream
|
||||
* @param unencryptedFile File on disk
|
||||
* @return MIME type
|
||||
*/
|
||||
public static String guessOpenDocumentFormat(File file, InputStream inputStream) {
|
||||
public static String guessOpenDocumentFormat(File file, Path unencryptedFile) {
|
||||
if (!MimeType.APPLICATION_ZIP.equals(file.getMimeType())) {
|
||||
// open document formats are ZIP files
|
||||
return file.getMimeType();
|
||||
}
|
||||
|
||||
String mimeType = file.getMimeType();
|
||||
try (ZipArchiveInputStream archiveInputStream = new ZipArchiveInputStream(inputStream, Charsets.ISO_8859_1.name())) {
|
||||
ArchiveEntry archiveEntry = archiveInputStream.getNextEntry();
|
||||
try (InputStream inputStream = Files.newInputStream(unencryptedFile);
|
||||
ZipInputStream zipInputStream = new ZipInputStream(inputStream, Charsets.ISO_8859_1)) {
|
||||
ZipEntry archiveEntry = zipInputStream.getNextEntry();
|
||||
while (archiveEntry != null) {
|
||||
if (archiveEntry.getName().equals("mimetype")) {
|
||||
// Maybe it's an ODT file
|
||||
String content = new String(IOUtils.toByteArray(archiveInputStream), Charsets.ISO_8859_1);
|
||||
String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1);
|
||||
if (MimeType.OPEN_DOCUMENT_TEXT.equals(content.trim())) {
|
||||
mimeType = MimeType.OPEN_DOCUMENT_TEXT;
|
||||
break;
|
||||
}
|
||||
} else if (archiveEntry.getName().equals("[Content_Types].xml")) {
|
||||
// Maybe it's a DOCX file
|
||||
String content = new String(IOUtils.toByteArray(archiveInputStream), Charsets.ISO_8859_1);
|
||||
String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1);
|
||||
if (content.contains(MimeType.OFFICE_DOCUMENT)) {
|
||||
mimeType = MimeType.OFFICE_DOCUMENT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
archiveEntry = archiveInputStream.getNextEntry();
|
||||
archiveEntry = zipInputStream.getNextEntry();
|
||||
}
|
||||
|
||||
inputStream.reset();
|
||||
} catch (Exception e) {
|
||||
// In case of any error, just give up and keep the ZIP MIME type
|
||||
return file.getMimeType();
|
||||
|
@ -1,18 +1,20 @@
|
||||
package com.sismics.docs.core.util;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Date;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.io.Resources;
|
||||
import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* Test of the file entity utilities.
|
||||
@ -22,26 +24,22 @@ import com.sismics.util.mime.MimeType;
|
||||
public class TestFileUtil {
|
||||
@Test
|
||||
public void extractContentOpenDocumentTextTest() throws Exception {
|
||||
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream()) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||
try (InputStream pdfInputStream = PdfUtil.convertToPdf(file, inputStream, false)) {
|
||||
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream);
|
||||
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||
}
|
||||
}
|
||||
Path path = Paths.get(ClassLoader.getSystemResource("file/document.odt").toURI());
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||
Path pdfPath = PdfUtil.convertToPdf(file, path);
|
||||
String content = FileUtil.extractContent(null, file, path, pdfPath);
|
||||
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void extractContentOfficeDocumentTest() throws Exception {
|
||||
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream()) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.OFFICE_DOCUMENT);
|
||||
try (InputStream pdfInputStream = PdfUtil.convertToPdf(file, inputStream, false)) {
|
||||
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream);
|
||||
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||
}
|
||||
}
|
||||
Path path = Paths.get(ClassLoader.getSystemResource("file/document.docx").toURI());
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.OFFICE_DOCUMENT);
|
||||
Path pdfPath = PdfUtil.convertToPdf(file, path);
|
||||
String content = FileUtil.extractContent(null, file, path, pdfPath);
|
||||
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -97,8 +95,9 @@ public class TestFileUtil {
|
||||
file4.setId("document_odt");
|
||||
file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||
|
||||
InputStream is = PdfUtil.convertToPdf(documentDto, Lists.newArrayList(file0, file1, file2, file3, file4), true, true, 10);
|
||||
is.close();
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
PdfUtil.convertToPdf(documentDto, Lists.newArrayList(file0, file1, file2, file3, file4), true, true, 10, outputStream);
|
||||
Assert.assertTrue(outputStream.toByteArray().length > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,16 +1,13 @@
|
||||
package com.sismics.util;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.compress.utils.IOUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.io.Resources;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
import com.sismics.util.mime.MimeTypeUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
/**
|
||||
* Test of the utilities to check MIME types.
|
||||
@ -18,23 +15,18 @@ import com.sismics.util.mime.MimeTypeUtil;
|
||||
* @author bgamard
|
||||
*/
|
||||
public class TestMimeTypeUtil {
|
||||
|
||||
@Test
|
||||
public void guessOpenDocumentFormatTest() throws Exception {
|
||||
// Detect ODT files
|
||||
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream();
|
||||
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.APPLICATION_ZIP);
|
||||
Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
|
||||
}
|
||||
|
||||
Path path = Paths.get(ClassLoader.getSystemResource("file/document.odt").toURI());
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.APPLICATION_ZIP);
|
||||
Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessOpenDocumentFormat(file, path));
|
||||
|
||||
// Detect DOCX files
|
||||
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream();
|
||||
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.APPLICATION_ZIP);
|
||||
Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
|
||||
}
|
||||
path = Paths.get(ClassLoader.getSystemResource("file/document.docx").toURI());
|
||||
file = new File();
|
||||
file.setMimeType(MimeType.APPLICATION_ZIP);
|
||||
Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessOpenDocumentFormat(file, path));
|
||||
}
|
||||
}
|
||||
|
@ -275,16 +275,10 @@ public class DocumentResource extends BaseResource {
|
||||
StreamingOutput stream = new StreamingOutput() {
|
||||
@Override
|
||||
public void write(OutputStream outputStream) throws IOException, WebApplicationException {
|
||||
try (InputStream inputStream = PdfUtil.convertToPdf(documentDto, fileList, fitImageToPage, metadata, margin)) {
|
||||
ByteStreams.copy(inputStream, outputStream);
|
||||
try {
|
||||
PdfUtil.convertToPdf(documentDto, fileList, fitImageToPage, metadata, margin, outputStream);
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
} finally {
|
||||
try {
|
||||
outputStream.close();
|
||||
} catch (IOException e) {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -14,10 +14,7 @@ import com.sismics.docs.core.event.FileCreatedAsyncEvent;
|
||||
import com.sismics.docs.core.event.FileDeletedAsyncEvent;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.model.jpa.User;
|
||||
import com.sismics.docs.core.util.DirectoryUtil;
|
||||
import com.sismics.docs.core.util.EncryptionUtil;
|
||||
import com.sismics.docs.core.util.FileUtil;
|
||||
import com.sismics.docs.core.util.PdfUtil;
|
||||
import com.sismics.docs.core.util.*;
|
||||
import com.sismics.rest.exception.ClientException;
|
||||
import com.sismics.rest.exception.ForbiddenClientException;
|
||||
import com.sismics.rest.exception.ServerException;
|
||||
@ -37,13 +34,11 @@ import javax.ws.rs.core.MediaType;
|
||||
import javax.ws.rs.core.Response;
|
||||
import javax.ws.rs.core.Response.Status;
|
||||
import javax.ws.rs.core.StreamingOutput;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.*;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.text.MessageFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
@ -114,27 +109,29 @@ public class FileResource extends BaseResource {
|
||||
}
|
||||
}
|
||||
|
||||
// Keep unencrypted data in memory, because we will need it two times
|
||||
byte[] fileData;
|
||||
// Keep unencrypted data temporary on disk, because we will need it two times
|
||||
java.nio.file.Path unencryptedFile;
|
||||
long fileSize;
|
||||
try {
|
||||
fileData = ByteStreams.toByteArray(fileBodyPart.getValueAs(InputStream.class));
|
||||
unencryptedFile = ThreadLocalContext.get().createTemporaryFile();
|
||||
Files.copy(fileBodyPart.getValueAs(InputStream.class), unencryptedFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
fileSize = Files.size(unencryptedFile);
|
||||
} catch (IOException e) {
|
||||
throw new ServerException("StreamError", "Error reading the input file", e);
|
||||
}
|
||||
InputStream fileInputStream = new ByteArrayInputStream(fileData);
|
||||
|
||||
|
||||
// Validate mime type
|
||||
String name = fileBodyPart.getContentDisposition() != null ?
|
||||
fileBodyPart.getContentDisposition().getFileName() : null;
|
||||
String mimeType;
|
||||
try {
|
||||
mimeType = MimeTypeUtil.guessMimeType(fileInputStream, name);
|
||||
mimeType = MimeTypeUtil.guessMimeType(unencryptedFile, name);
|
||||
} catch (IOException e) {
|
||||
throw new ServerException("ErrorGuessMime", "Error guessing mime type", e);
|
||||
}
|
||||
|
||||
// Validate quota
|
||||
if (user.getStorageCurrent() + fileData.length > user.getStorageQuota()) {
|
||||
if (user.getStorageCurrent() + fileSize > user.getStorageQuota()) {
|
||||
throw new ClientException("QuotaReached", "Quota limit reached");
|
||||
}
|
||||
|
||||
@ -158,16 +155,16 @@ public class FileResource extends BaseResource {
|
||||
String fileId = fileDao.create(file, principal.getId());
|
||||
|
||||
// Guess the mime type a second time, for open document format (first detected as simple ZIP file)
|
||||
file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, fileInputStream));
|
||||
|
||||
file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, unencryptedFile));
|
||||
|
||||
// Convert to PDF if necessary (for thumbnail and text extraction)
|
||||
InputStream pdfIntputStream = PdfUtil.convertToPdf(file, fileInputStream, true);
|
||||
|
||||
java.nio.file.Path unencryptedPdfFile = PdfUtil.convertToPdf(file, unencryptedFile);
|
||||
|
||||
// Save the file
|
||||
FileUtil.save(fileInputStream, pdfIntputStream, file, user.getPrivateKey());
|
||||
FileUtil.save(unencryptedFile, unencryptedPdfFile, file, user.getPrivateKey());
|
||||
|
||||
// Update the user quota
|
||||
user.setStorageCurrent(user.getStorageCurrent() + fileData.length);
|
||||
user.setStorageCurrent(user.getStorageCurrent() + fileSize);
|
||||
userDao.updateQuota(user);
|
||||
|
||||
// Raise a new file created event and document updated event if we have a document
|
||||
@ -176,8 +173,8 @@ public class FileResource extends BaseResource {
|
||||
fileCreatedAsyncEvent.setUserId(principal.getId());
|
||||
fileCreatedAsyncEvent.setLanguage(documentDto.getLanguage());
|
||||
fileCreatedAsyncEvent.setFile(file);
|
||||
fileCreatedAsyncEvent.setInputStream(fileInputStream);
|
||||
fileCreatedAsyncEvent.setPdfInputStream(pdfIntputStream);
|
||||
fileCreatedAsyncEvent.setUnencryptedFile(unencryptedFile);
|
||||
fileCreatedAsyncEvent.setUnencryptedPdfFile(unencryptedPdfFile);
|
||||
ThreadLocalContext.get().addAsyncEvent(fileCreatedAsyncEvent);
|
||||
|
||||
DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent();
|
||||
@ -190,7 +187,7 @@ public class FileResource extends BaseResource {
|
||||
JsonObjectBuilder response = Json.createObjectBuilder()
|
||||
.add("status", "ok")
|
||||
.add("id", fileId)
|
||||
.add("size", fileData.length);
|
||||
.add("size", fileSize);
|
||||
return Response.ok().entity(response.build()).build();
|
||||
} catch (Exception e) {
|
||||
throw new ServerException("FileError", "Error adding a file", e);
|
||||
@ -254,13 +251,13 @@ public class FileResource extends BaseResource {
|
||||
// Raise a new file created event and document updated event (it wasn't sent during file creation)
|
||||
try {
|
||||
java.nio.file.Path storedFile = DirectoryUtil.getStorageDirectory().resolve(id);
|
||||
InputStream fileInputStream = Files.newInputStream(storedFile);
|
||||
final InputStream responseInputStream = EncryptionUtil.decryptInputStream(fileInputStream, user.getPrivateKey());
|
||||
java.nio.file.Path unencryptedFile = EncryptionUtil.decryptFile(storedFile, user.getPrivateKey());
|
||||
FileCreatedAsyncEvent fileCreatedAsyncEvent = new FileCreatedAsyncEvent();
|
||||
fileCreatedAsyncEvent.setUserId(principal.getId());
|
||||
fileCreatedAsyncEvent.setLanguage(documentDto.getLanguage());
|
||||
fileCreatedAsyncEvent.setFile(file);
|
||||
fileCreatedAsyncEvent.setInputStream(responseInputStream);
|
||||
fileCreatedAsyncEvent.setUnencryptedFile(unencryptedFile);
|
||||
fileCreatedAsyncEvent.setUnencryptedPdfFile(PdfUtil.convertToPdf(file, unencryptedFile));
|
||||
ThreadLocalContext.get().addAsyncEvent(fileCreatedAsyncEvent);
|
||||
|
||||
DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent();
|
||||
|
@ -332,7 +332,7 @@
|
||||
"filter": {
|
||||
"filesize": {
|
||||
"mb": "Mo",
|
||||
"kb": "Ko"
|
||||
"kb": "ko"
|
||||
}
|
||||
},
|
||||
"acl": {
|
||||
|
@ -1,10 +1,16 @@
|
||||
package com.sismics.docs.rest;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Date;
|
||||
import com.google.common.io.ByteStreams;
|
||||
import com.google.common.io.Resources;
|
||||
import com.sismics.docs.core.util.DirectoryUtil;
|
||||
import com.sismics.util.filter.TokenBasedSecurityFilter;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
import com.sismics.util.mime.MimeTypeUtil;
|
||||
import org.glassfish.jersey.media.multipart.FormDataMultiPart;
|
||||
import org.glassfish.jersey.media.multipart.MultiPartFeature;
|
||||
import org.glassfish.jersey.media.multipart.file.StreamDataBodyPart;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import javax.json.JsonArray;
|
||||
import javax.json.JsonObject;
|
||||
@ -13,19 +19,10 @@ import javax.ws.rs.core.Form;
|
||||
import javax.ws.rs.core.MediaType;
|
||||
import javax.ws.rs.core.Response;
|
||||
import javax.ws.rs.core.Response.Status;
|
||||
|
||||
import org.glassfish.jersey.media.multipart.FormDataMultiPart;
|
||||
import org.glassfish.jersey.media.multipart.MultiPartFeature;
|
||||
import org.glassfish.jersey.media.multipart.file.StreamDataBodyPart;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.io.ByteStreams;
|
||||
import com.google.common.io.Resources;
|
||||
import com.sismics.docs.core.util.DirectoryUtil;
|
||||
import com.sismics.util.filter.TokenBasedSecurityFilter;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
import com.sismics.util.mime.MimeTypeUtil;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* Exhaustive test of the file resource.
|
||||
@ -123,10 +120,8 @@ public class TestFileResource extends BaseJerseyTest {
|
||||
|
||||
// Check that the files are not readable directly from FS
|
||||
Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file1Id);
|
||||
try (InputStream storedFileInputStream = new BufferedInputStream(Files.newInputStream(storedFile))) {
|
||||
Assert.assertEquals(MimeType.DEFAULT, MimeTypeUtil.guessMimeType(storedFileInputStream, null));
|
||||
}
|
||||
|
||||
Assert.assertEquals(MimeType.DEFAULT, MimeTypeUtil.guessMimeType(storedFile, null));
|
||||
|
||||
// Get all files from a document
|
||||
json = target().path("/file/list")
|
||||
.queryParam("id", document1Id)
|
||||
|
Loading…
Reference in New Issue
Block a user