Closes #141: Never close full file content in memory

This commit is contained in:
Benjamin Gamard 2017-11-06 16:45:47 +01:00
parent 4d161aea07
commit 244ddc7ce2
17 changed files with 389 additions and 354 deletions

View File

@ -1,6 +1,7 @@
package com.sismics.docs.core.event; package com.sismics.docs.core.event;
import java.io.InputStream; import java.io.InputStream;
import java.nio.file.Path;
import com.google.common.base.MoreObjects; import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
@ -22,16 +23,16 @@ public class FileCreatedAsyncEvent extends UserEvent {
private String language; private String language;
/** /**
* Unencrypted input stream containing the file. * Unencrypted original file.
*/ */
private InputStream inputStream; private Path unencryptedFile;
/** /**
* Unencrypted input stream containing a PDF representation * Unencrypted file containing PDF representation
* of the file. May be null if the PDF conversion is not * of the original file. May be null if the PDF conversion is not
* necessary or not possible. * necessary or not possible.
*/ */
private InputStream pdfInputStream; private Path unencryptedPdfFile;
public File getFile() { public File getFile() {
return file; return file;
@ -48,21 +49,23 @@ public class FileCreatedAsyncEvent extends UserEvent {
public void setLanguage(String language) { public void setLanguage(String language) {
this.language = language; this.language = language;
} }
public InputStream getInputStream() { public Path getUnencryptedFile() {
return inputStream; return unencryptedFile;
} }
public void setInputStream(InputStream inputStream) { public FileCreatedAsyncEvent setUnencryptedFile(Path unencryptedFile) {
this.inputStream = inputStream; this.unencryptedFile = unencryptedFile;
} return this;
public InputStream getPdfInputStream() {
return pdfInputStream;
} }
public void setPdfInputStream(InputStream pdfInputStream) { public Path getUnencryptedPdfFile() {
this.pdfInputStream = pdfInputStream; return unencryptedPdfFile;
}
public FileCreatedAsyncEvent setUnencryptedPdfFile(Path unencryptedPdfFile) {
this.unencryptedPdfFile = unencryptedPdfFile;
return this;
} }
@Override @Override

View File

@ -0,0 +1,35 @@
package com.sismics.docs.core.event;
import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.File;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.List;
/**
* Cleanup temporary files event.
*
* @author bgamard
*/
public class TemporaryFileCleanupAsyncEvent {
/**
* Temporary files.
*/
private List<Path> fileList;
public TemporaryFileCleanupAsyncEvent(List<Path> fileList) {
this.fileList = fileList;
}
public List<Path> getFileList() {
return fileList;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("files", fileList)
.toString();
}
}

View File

@ -1,10 +1,5 @@
package com.sismics.docs.core.listener.async; package com.sismics.docs.core.listener.async;
import java.text.MessageFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe; import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.dao.jpa.FileDao; import com.sismics.docs.core.dao.jpa.FileDao;
import com.sismics.docs.core.dao.lucene.LuceneDao; import com.sismics.docs.core.dao.lucene.LuceneDao;
@ -12,6 +7,10 @@ import com.sismics.docs.core.event.FileCreatedAsyncEvent;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.util.FileUtil; import com.sismics.docs.core.util.FileUtil;
import com.sismics.docs.core.util.TransactionUtil; import com.sismics.docs.core.util.TransactionUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.MessageFormat;
/** /**
* Listener on file created. * Listener on file created.
@ -28,7 +27,7 @@ public class FileCreatedAsyncListener {
* File created. * File created.
* *
* @param fileCreatedAsyncEvent File created event * @param fileCreatedAsyncEvent File created event
* @throws Exception * @throws Exception e
*/ */
@Subscribe @Subscribe
public void on(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception { public void on(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception {
@ -42,11 +41,7 @@ public class FileCreatedAsyncListener {
// Extract text content from the file // Extract text content from the file
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getLanguage(), file, final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getLanguage(), file,
fileCreatedAsyncEvent.getInputStream(), fileCreatedAsyncEvent.getPdfInputStream()); fileCreatedAsyncEvent.getUnencryptedFile(), fileCreatedAsyncEvent.getUnencryptedPdfFile());
fileCreatedAsyncEvent.getInputStream().close();
if (fileCreatedAsyncEvent.getPdfInputStream() != null) {
fileCreatedAsyncEvent.getPdfInputStream().close();
}
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime)); log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
// Store the text content in the database // Store the text content in the database

View File

@ -0,0 +1,38 @@
package com.sismics.docs.core.listener.async;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.event.TemporaryFileCleanupAsyncEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Files;
import java.nio.file.Path;
/**
* Listener to cleanup temporary files created during a request.
*
* @author bgamard
*/
public class TemporaryFileCleanupAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
/**
* Cleanup temporary files.
*
* @param event Temporary file cleanup event
* @throws Exception
*/
@Subscribe
public void on(final TemporaryFileCleanupAsyncEvent event) throws Exception {
if (log.isInfoEnabled()) {
log.info("Cleanup temporary files event: " + event.toString());
}
for (Path file : event.getFileList()) {
Files.delete(file);
}
}
}

View File

@ -12,12 +12,8 @@ import com.google.common.eventbus.EventBus;
import com.lowagie.text.FontFactory; import com.lowagie.text.FontFactory;
import com.sismics.docs.core.constant.ConfigType; import com.sismics.docs.core.constant.ConfigType;
import com.sismics.docs.core.dao.jpa.ConfigDao; import com.sismics.docs.core.dao.jpa.ConfigDao;
import com.sismics.docs.core.listener.async.DocumentCreatedAsyncListener; import com.sismics.docs.core.event.TemporaryFileCleanupAsyncEvent;
import com.sismics.docs.core.listener.async.DocumentDeletedAsyncListener; import com.sismics.docs.core.listener.async.*;
import com.sismics.docs.core.listener.async.DocumentUpdatedAsyncListener;
import com.sismics.docs.core.listener.async.FileCreatedAsyncListener;
import com.sismics.docs.core.listener.async.FileDeletedAsyncListener;
import com.sismics.docs.core.listener.async.RebuildIndexAsyncListener;
import com.sismics.docs.core.listener.sync.DeadEventListener; import com.sismics.docs.core.listener.sync.DeadEventListener;
import com.sismics.docs.core.model.jpa.Config; import com.sismics.docs.core.model.jpa.Config;
import com.sismics.docs.core.service.IndexingService; import com.sismics.docs.core.service.IndexingService;
@ -86,6 +82,7 @@ public class AppContext {
asyncEventBus.register(new DocumentUpdatedAsyncListener()); asyncEventBus.register(new DocumentUpdatedAsyncListener());
asyncEventBus.register(new DocumentDeletedAsyncListener()); asyncEventBus.register(new DocumentDeletedAsyncListener());
asyncEventBus.register(new RebuildIndexAsyncListener()); asyncEventBus.register(new RebuildIndexAsyncListener());
asyncEventBus.register(new TemporaryFileCleanupAsyncListener());
} }
/** /**
@ -132,6 +129,7 @@ public class AppContext {
if (EnvironmentUtil.isUnitTest()) { if (EnvironmentUtil.isUnitTest()) {
return new EventBus(); return new EventBus();
} else { } else {
// /!\ Don't add more threads because a cleanup event is fired at the end of each request
ThreadPoolExecutor executor = new ThreadPoolExecutor(1, 1, ThreadPoolExecutor executor = new ThreadPoolExecutor(1, 1,
0L, TimeUnit.MILLISECONDS, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>()); new LinkedBlockingQueue<Runnable>());

View File

@ -1,20 +1,22 @@
package com.sismics.docs.core.util; package com.sismics.docs.core.util;
import java.io.InputStream; import com.google.common.base.Strings;
import java.math.BigInteger; import com.sismics.util.context.ThreadLocalContext;
import java.security.NoSuchAlgorithmException; import org.bouncycastle.jce.provider.BouncyCastleProvider;
import java.security.SecureRandom;
import java.security.Security;
import javax.crypto.Cipher; import javax.crypto.Cipher;
import javax.crypto.CipherInputStream; import javax.crypto.CipherInputStream;
import javax.crypto.SecretKey; import javax.crypto.SecretKey;
import javax.crypto.SecretKeyFactory; import javax.crypto.SecretKeyFactory;
import javax.crypto.spec.PBEKeySpec; import javax.crypto.spec.PBEKeySpec;
import java.io.InputStream;
import org.bouncycastle.jce.provider.BouncyCastleProvider; import java.math.BigInteger;
import java.nio.file.Files;
import com.google.common.base.Strings; import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.security.Security;
/** /**
* Encryption utilities. * Encryption utilities.
@ -55,7 +57,28 @@ public class EncryptionUtil {
public static InputStream decryptInputStream(InputStream is, String privateKey) throws Exception { public static InputStream decryptInputStream(InputStream is, String privateKey) throws Exception {
return new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE)); return new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE));
} }
/**
* Decrypt a file to a temporary file using the specified private key.
*
* @param file Encrypted file
* @param privateKey Private key
* @return Decrypted temporary file
* @throws Exception
*/
public static Path decryptFile(Path file, String privateKey) throws Exception {
if (privateKey == null) {
// For unit testing
return file;
}
Path tmpFile = ThreadLocalContext.get().createTemporaryFile();
try (InputStream is = Files.newInputStream(file)) {
Files.copy(new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE)), tmpFile, StandardCopyOption.REPLACE_EXISTING);
}
return tmpFile;
}
/** /**
* Return an encryption cipher. * Return an encryption cipher.
* *

View File

@ -36,34 +36,34 @@ public class FileUtil {
* *
* @param language Language to extract * @param language Language to extract
* @param file File to extract * @param file File to extract
* @param inputStream Unencrypted input stream * @param unencryptedFile Unencrypted file
* @param pdfInputStream Unencrypted PDF input stream * @param unencryptedPdfFile Unencrypted PDF file
* @return Content extract * @return Content extract
*/ */
public static String extractContent(String language, File file, InputStream inputStream, InputStream pdfInputStream) { public static String extractContent(String language, File file, Path unencryptedFile, Path unencryptedPdfFile) {
String content = null; String content = null;
if (ImageUtil.isImage(file.getMimeType())) { if (ImageUtil.isImage(file.getMimeType())) {
content = ocrFile(inputStream, language); content = ocrFile(unencryptedFile, language);
} else if (pdfInputStream != null) { } else if (unencryptedPdfFile != null) {
content = PdfUtil.extractPdf(pdfInputStream); content = PdfUtil.extractPdf(unencryptedPdfFile);
} }
return content; return content;
} }
/** /**
* Optical character recognition on a stream. * Optical character recognition on a file.
* *
* @param inputStream Unencrypted input stream * @param unecryptedFile Unencrypted file
* @param language Language to OCR * @param language Language to OCR
* @return Content extracted * @return Content extracted
*/ */
private static String ocrFile(InputStream inputStream, String language) { private static String ocrFile(Path unecryptedFile, String language) {
Tesseract instance = Tesseract.getInstance(); Tesseract instance = Tesseract.getInstance();
String content = null; String content = null;
BufferedImage image; BufferedImage image;
try { try (InputStream inputStream = Files.newInputStream(unecryptedFile)) {
image = ImageIO.read(inputStream); image = ImageIO.read(inputStream);
} catch (IOException e) { } catch (IOException e) {
log.error("Error reading the image", e); log.error("Error reading the image", e);
@ -90,38 +90,39 @@ public class FileUtil {
/** /**
* Save a file on the storage filesystem. * Save a file on the storage filesystem.
* *
* @param inputStream Unencrypted input stream * @param unencryptedFile Unencrypted file
* @param pdfInputStream PDF input stream * @param unencryptedPdfFile Unencrypted PDF file
* @param file File to save * @param file File to save
* @param privateKey Private key used for encryption * @param privateKey Private key used for encryption
*/ */
public static void save(InputStream inputStream, InputStream pdfInputStream, File file, String privateKey) throws Exception { public static void save(Path unencryptedFile, Path unencryptedPdfFile, File file, String privateKey) throws Exception {
Cipher cipher = EncryptionUtil.getEncryptionCipher(privateKey); Cipher cipher = EncryptionUtil.getEncryptionCipher(privateKey);
Path path = DirectoryUtil.getStorageDirectory().resolve(file.getId()); Path path = DirectoryUtil.getStorageDirectory().resolve(file.getId());
Files.copy(new CipherInputStream(inputStream, cipher), path); try (InputStream inputStream = Files.newInputStream(unencryptedFile)) {
inputStream.reset(); Files.copy(new CipherInputStream(inputStream, cipher), path);
}
// Generate file variations // Generate file variations
saveVariations(file, inputStream, pdfInputStream, cipher); saveVariations(file, unencryptedFile, unencryptedPdfFile, cipher);
} }
/** /**
* Generate file variations. * Generate file variations.
* *
* @param file File from database * @param file File from database
* @param inputStream Unencrypted input stream * @param unencryptedFile Unencrypted file
* @param pdfInputStream Unencrypted PDF input stream * @param unencryptedPdfFile Unencrypted PDF file
* @param cipher Cipher to use for encryption * @param cipher Cipher to use for encryption
*/ */
private static void saveVariations(File file, InputStream inputStream, InputStream pdfInputStream, Cipher cipher) throws Exception { private static void saveVariations(File file, Path unencryptedFile, Path unencryptedPdfFile, Cipher cipher) throws Exception {
BufferedImage image = null; BufferedImage image = null;
if (ImageUtil.isImage(file.getMimeType())) { if (ImageUtil.isImage(file.getMimeType())) {
image = ImageIO.read(inputStream); try (InputStream inputStream = Files.newInputStream(unencryptedFile)) {
inputStream.reset(); image = ImageIO.read(inputStream);
} else if(pdfInputStream != null) { }
} else if (unencryptedPdfFile != null) {
// Generate preview from the first page of the PDF // Generate preview from the first page of the PDF
image = PdfUtil.renderFirstPage(pdfInputStream); image = PdfUtil.renderFirstPage(unencryptedPdfFile);
pdfInputStream.reset();
} }
if (image != null) { if (image != null) {

View File

@ -3,7 +3,6 @@ package com.sismics.docs.core.util;
import com.google.common.base.Charsets; import com.google.common.base.Charsets;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.google.common.io.ByteStreams; import com.google.common.io.ByteStreams;
import com.google.common.io.CharStreams;
import com.google.common.io.Closer; import com.google.common.io.Closer;
import com.google.common.io.Resources; import com.google.common.io.Resources;
import com.lowagie.text.*; import com.lowagie.text.*;
@ -12,6 +11,7 @@ import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.util.pdf.PdfPage; import com.sismics.docs.core.util.pdf.PdfPage;
import com.sismics.util.ImageUtil; import com.sismics.util.ImageUtil;
import com.sismics.util.context.ThreadLocalContext;
import com.sismics.util.mime.MimeType; import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility; import org.apache.pdfbox.multipdf.PDFMergerUtility;
@ -34,7 +34,9 @@ import org.slf4j.LoggerFactory;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import java.awt.image.BufferedImage; import java.awt.image.BufferedImage;
import java.io.*; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL; import java.net.URL;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@ -56,13 +58,13 @@ public class PdfUtil {
/** /**
* Extract text from a PDF. * Extract text from a PDF.
* *
* @param inputStream Unencrypted input stream * @param unencryptedPdfFile Unencrypted PDF file
* @return Content extracted * @return Content extracted
*/ */
public static String extractPdf(InputStream inputStream) { public static String extractPdf(Path unencryptedPdfFile) {
String content = null; String content = null;
PDDocument pdfDocument = null; PDDocument pdfDocument = null;
try { try (InputStream inputStream = Files.newInputStream(unencryptedPdfFile)) {
PDFTextStripper stripper = new PDFTextStripper(); PDFTextStripper stripper = new PDFTextStripper();
pdfDocument = PDDocument.load(inputStream); pdfDocument = PDDocument.load(inputStream);
content = stripper.getText(pdfDocument); content = stripper.getText(pdfDocument);
@ -85,26 +87,25 @@ public class PdfUtil {
* Convert a file to PDF if necessary. * Convert a file to PDF if necessary.
* *
* @param file File * @param file File
* @param inputStream InputStream * @param unencryptedFile Unencrypted file
* @param reset Reset the stream after usage * @return PDF temporary file
* @return PDF input stream
*/ */
public static InputStream convertToPdf(File file, InputStream inputStream, boolean reset) throws Exception { public static Path convertToPdf(File file, Path unencryptedFile) throws Exception {
if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) { if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
// It's already PDF, just return the input // It's already PDF, just return the file
return inputStream; return unencryptedFile;
} }
if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) { if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) {
return convertOfficeDocument(inputStream, reset); return convertOfficeDocument(unencryptedFile);
} }
if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) { if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) {
return convertOpenDocumentText(inputStream, reset); return convertOpenDocumentText(unencryptedFile);
} }
if (file.getMimeType().equals(MimeType.TEXT_PLAIN) || file.getMimeType().equals(MimeType.TEXT_CSV)) { if (file.getMimeType().equals(MimeType.TEXT_PLAIN) || file.getMimeType().equals(MimeType.TEXT_CSV)) {
return convertTextPlain(inputStream, reset); return convertTextPlain(unencryptedFile);
} }
// PDF conversion not necessary/possible // PDF conversion not necessary/possible
@ -114,64 +115,58 @@ public class PdfUtil {
/** /**
* Convert a text plain document to PDF. * Convert a text plain document to PDF.
* *
* @param inputStream Unecnrypted input stream * @param unencryptedFile Unencrypted file
* @param reset Reset the stream after usage * @return PDF file
* @return PDF input stream
*/ */
private static InputStream convertTextPlain(InputStream inputStream, boolean reset) throws Exception { private static Path convertTextPlain(Path unencryptedFile) throws Exception {
Document output = new Document(PageSize.A4, 40, 40, 40, 40); Document output = new Document(PageSize.A4, 40, 40, 40, 40);
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream(); Path tempFile = ThreadLocalContext.get().createTemporaryFile();
OutputStream pdfOutputStream = Files.newOutputStream(tempFile);
PdfWriter.getInstance(output, pdfOutputStream); PdfWriter.getInstance(output, pdfOutputStream);
output.open(); output.open();
String content = CharStreams.toString(new InputStreamReader(inputStream, Charsets.UTF_8)); String content = new String(Files.readAllBytes(unencryptedFile), Charsets.UTF_8);
Font font = FontFactory.getFont("LiberationMono-Regular"); Font font = FontFactory.getFont("LiberationMono-Regular");
Paragraph paragraph = new Paragraph(content, font); Paragraph paragraph = new Paragraph(content, font);
paragraph.setAlignment(Element.ALIGN_LEFT); paragraph.setAlignment(Element.ALIGN_LEFT);
output.add(paragraph); output.add(paragraph);
output.close(); output.close();
if (reset) { return tempFile;
inputStream.reset();
}
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
} }
/** /**
* Convert an open document text file to PDF. * Convert an open document text file to PDF.
* *
* @param inputStream Unencrypted input stream * @param unencryptedFile Unencrypted file
* @param reset Reset the stream after usage * @return PDF file
* @return PDF input stream
*/ */
private static InputStream convertOpenDocumentText(InputStream inputStream, boolean reset) throws Exception { private static Path convertOpenDocumentText(Path unencryptedFile) throws Exception {
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream(); Path tempFile = ThreadLocalContext.get().createTemporaryFile();
OdfTextDocument document = OdfTextDocument.loadDocument(inputStream); try (InputStream inputStream = Files.newInputStream(unencryptedFile);
PdfOptions options = PdfOptions.create(); OutputStream outputStream = Files.newOutputStream(tempFile)) {
PdfConverter.getInstance().convert(document, pdfOutputStream, options); OdfTextDocument document = OdfTextDocument.loadDocument(inputStream);
if (reset) { PdfOptions options = PdfOptions.create();
inputStream.reset(); PdfConverter.getInstance().convert(document, outputStream, options);
} }
return new ByteArrayInputStream(pdfOutputStream.toByteArray()); return tempFile;
} }
/** /**
* Convert an Office document to PDF. * Convert an Office document to PDF.
* *
* @param inputStream Unencrypted input stream * @param unencryptedFile Unencrypted file
* @param reset Reset the stream after usage * @return PDF file
* @return PDF input stream
*/ */
private static InputStream convertOfficeDocument(InputStream inputStream, boolean reset) throws Exception { private static Path convertOfficeDocument(Path unencryptedFile) throws Exception {
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream(); Path tempFile = ThreadLocalContext.get().createTemporaryFile();
XWPFDocument document = new XWPFDocument(inputStream); try (InputStream inputStream = Files.newInputStream(unencryptedFile);
org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create(); OutputStream outputStream = Files.newOutputStream(tempFile)) {
org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, pdfOutputStream, options); XWPFDocument document = new XWPFDocument(inputStream);
if (reset) { org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create();
inputStream.reset(); org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, outputStream, options);
} }
return new ByteArrayInputStream(pdfOutputStream.toByteArray()); return tempFile;
} }
/** /**
@ -182,10 +177,10 @@ public class PdfUtil {
* @param fitImageToPage Fit images to the page * @param fitImageToPage Fit images to the page
* @param metadata Add a page with metadata * @param metadata Add a page with metadata
* @param margin Margins in millimeters * @param margin Margins in millimeters
* @return PDF input stream * @param outputStream Output stream to write to, will be closed
*/ */
public static InputStream convertToPdf(DocumentDto documentDto, List<File> fileList, public static void convertToPdf(DocumentDto documentDto, List<File> fileList,
boolean fitImageToPage, boolean metadata, int margin) throws Exception { boolean fitImageToPage, boolean metadata, int margin, OutputStream outputStream) throws Exception {
// Setup PDFBox // Setup PDFBox
Closer closer = Closer.create(); Closer closer = Closer.create();
MemoryUsageSetting memUsageSettings = MemoryUsageSetting.setupMixed(1000000); // 1MB max memory usage MemoryUsageSetting memUsageSettings = MemoryUsageSetting.setupMixed(1000000); // 1MB max memory usage
@ -240,80 +235,75 @@ public class PdfUtil {
// Add files // Add files
for (File file : fileList) { for (File file : fileList) {
Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file.getId()); Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file.getId());
try (InputStream storedFileInputStream = file.getPrivateKey() == null ? // Try to decrypt the file if we have a private key available
Files.newInputStream(storedFile) : EncryptionUtil.decryptInputStream(Files.newInputStream(storedFile), file.getPrivateKey())) { // Decrypt the file to a temporary file
if (ImageUtil.isImage(file.getMimeType())) { Path unencryptedFile = EncryptionUtil.decryptFile(storedFile, file.getPrivateKey());
PDPage page = new PDPage(PDRectangle.A4); // Images into A4 pages
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) { if (ImageUtil.isImage(file.getMimeType())) {
// Read the image using the correct handler. PDFBox can't do it because it relies wrongly on file extension PDPage page = new PDPage(PDRectangle.A4); // Images into A4 pages
PDImageXObject pdImage = null; try (PDPageContentStream contentStream = new PDPageContentStream(doc, page);
if (file.getMimeType().equals(MimeType.IMAGE_JPEG)) { InputStream storedFileInputStream = Files.newInputStream(unencryptedFile)) {
pdImage = JPEGFactory.createFromStream(doc, storedFileInputStream); // Read the image using the correct handler. PDFBox can't do it because it relies wrongly on file extension
} else if (file.getMimeType().equals(MimeType.IMAGE_GIF) || file.getMimeType().equals(MimeType.IMAGE_PNG)) { PDImageXObject pdImage = null;
BufferedImage bim = ImageIO.read(storedFileInputStream); if (file.getMimeType().equals(MimeType.IMAGE_JPEG)) {
pdImage = LosslessFactory.createFromImage(doc, bim); pdImage = JPEGFactory.createFromStream(doc, storedFileInputStream);
} } else if (file.getMimeType().equals(MimeType.IMAGE_GIF) || file.getMimeType().equals(MimeType.IMAGE_PNG)) {
BufferedImage bim = ImageIO.read(storedFileInputStream);
// Do we want to fill the page with the image? pdImage = LosslessFactory.createFromImage(doc, bim);
if (fitImageToPage) { }
// Fill the page with the image
float widthAvailable = page.getMediaBox().getWidth() - 2 * margin * mmPerInch; // Do we want to fill the page with the image?
float heightAvailable = page.getMediaBox().getHeight() - 2 * margin * mmPerInch; if (fitImageToPage) {
// Fill the page with the image
// Compare page format and image format float widthAvailable = page.getMediaBox().getWidth() - 2 * margin * mmPerInch;
if (widthAvailable / heightAvailable < (float) pdImage.getWidth() / (float) pdImage.getHeight()) { float heightAvailable = page.getMediaBox().getHeight() - 2 * margin * mmPerInch;
float imageHeight = widthAvailable / pdImage.getWidth() * pdImage.getHeight();
contentStream.drawImage(pdImage, margin * mmPerInch, heightAvailable + margin * mmPerInch - imageHeight, // Compare page format and image format
widthAvailable, imageHeight); if (widthAvailable / heightAvailable < (float) pdImage.getWidth() / (float) pdImage.getHeight()) {
} else { float imageHeight = widthAvailable / pdImage.getWidth() * pdImage.getHeight();
float imageWidth = heightAvailable / pdImage.getHeight() * pdImage.getWidth(); contentStream.drawImage(pdImage, margin * mmPerInch, heightAvailable + margin * mmPerInch - imageHeight,
contentStream.drawImage(pdImage, margin * mmPerInch, margin * mmPerInch, widthAvailable, imageHeight);
imageWidth, heightAvailable);
}
} else { } else {
// Draw the image as is float imageWidth = heightAvailable / pdImage.getHeight() * pdImage.getWidth();
contentStream.drawImage(pdImage, margin * mmPerInch, contentStream.drawImage(pdImage, margin * mmPerInch, margin * mmPerInch,
page.getMediaBox().getHeight() - pdImage.getHeight() - margin * mmPerInch); imageWidth, heightAvailable);
} }
} else {
// Draw the image as is
contentStream.drawImage(pdImage, margin * mmPerInch,
page.getMediaBox().getHeight() - pdImage.getHeight() - margin * mmPerInch);
} }
doc.addPage(page);
} else {
// Try to convert the file to PDF
InputStream pdfInputStream = convertToPdf(file, storedFileInputStream, false);
if (pdfInputStream != null) {
// This file is convertible to PDF, just add it to the end
try {
PDDocument mergeDoc = PDDocument.load(pdfInputStream, memUsageSettings);
closer.register(mergeDoc);
PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();
pdfMergerUtility.appendDocument(doc, mergeDoc);
} finally {
pdfInputStream.close();
}
}
// All other non-PDF-convertible files are ignored
} }
doc.addPage(page);
} else {
// Try to convert the file to PDF
Path unencryptedPdfFile = convertToPdf(file, unencryptedFile);
if (unencryptedPdfFile != null) {
// This file is convertible to PDF, just add it to the end
PDDocument mergeDoc = PDDocument.load(unencryptedPdfFile.toFile(), memUsageSettings);
closer.register(mergeDoc);
PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();
pdfMergerUtility.appendDocument(doc, mergeDoc);
}
// All other non-PDF-convertible files are ignored
} }
} }
// Save to a temporary file doc.save(outputStream); // Write to the output stream
try (TemporaryFileStream temporaryFileStream = new TemporaryFileStream()) { closer.close(); // Close all remaining opened PDF
doc.save(temporaryFileStream.openWriteStream());
closer.close(); // Close all remaining opened PDF
return temporaryFileStream.openReadStream();
}
} }
} }
/** /**
* Render the first page of a PDF. * Render the first page of a PDF.
* *
* @param inputStream PDF document * @param unencryptedFile PDF document
* @return Render of the first page * @return Render of the first page
*/ */
public static BufferedImage renderFirstPage(InputStream inputStream) throws IOException { public static BufferedImage renderFirstPage(Path unencryptedFile) throws IOException {
try (PDDocument pdfDocument = PDDocument.load(inputStream)) { try (InputStream inputStream = Files.newInputStream(unencryptedFile);
PDDocument pdfDocument = PDDocument.load(inputStream)) {
PDFRenderer renderer = new PDFRenderer(pdfDocument); PDFRenderer renderer = new PDFRenderer(pdfDocument);
return renderer.renderImage(0); return renderer.renderImage(0);
} }

View File

@ -1,55 +0,0 @@
package com.sismics.docs.core.util;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.UUID;
/**
* Utilities for writing and reading to a temporary file.
*
* @author bgamard
*/
public class TemporaryFileStream implements Closeable {
/**
* Temporary file.
*/
private Path tempFile;
/**
* Construct a temporary file.
*
* @throws IOException
*/
public TemporaryFileStream() throws IOException {
tempFile = Files.createTempFile(UUID.randomUUID().toString(), ".tmp");
}
/**
* Open a stream for writing.
*
* @return OutputStream
* @throws IOException
*/
public OutputStream openWriteStream() throws IOException {
return Files.newOutputStream(tempFile);
}
/**
* Open a stream for reading.
*
* @return InputStream
* @throws IOException
*/
public InputStream openReadStream() throws IOException {
return Files.newInputStream(tempFile);
}
@Override
public void close() throws IOException {
Files.delete(tempFile);
}
}

View File

@ -1,9 +1,13 @@
package com.sismics.util.context; package com.sismics.util.context;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.sismics.docs.core.event.TemporaryFileCleanupAsyncEvent;
import com.sismics.docs.core.model.context.AppContext; import com.sismics.docs.core.model.context.AppContext;
import javax.persistence.EntityManager; import javax.persistence.EntityManager;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List; import java.util.List;
/** /**
@ -27,6 +31,12 @@ public class ThreadLocalContext {
*/ */
private List<Object> asyncEventList = Lists.newArrayList(); private List<Object> asyncEventList = Lists.newArrayList();
/**
* List of temporary files created during this request.
* They are deleted at the end of each request.
*/
private List<Path> temporaryFileList = Lists.newArrayList();
/** /**
* Private constructor. * Private constructor.
*/ */
@ -82,6 +92,17 @@ public class ThreadLocalContext {
asyncEventList.add(asyncEvent); asyncEventList.add(asyncEvent);
} }
/**
* Create a temporary file linked to the request.
*
* @return New temporary file
*/
public Path createTemporaryFile() throws IOException {
Path path = Files.createTempFile("sismics_docs", null);
temporaryFileList.add(path);
return path;
}
/** /**
* Fire all pending async events. * Fire all pending async events.
*/ */
@ -89,5 +110,11 @@ public class ThreadLocalContext {
for (Object asyncEvent : asyncEventList) { for (Object asyncEvent : asyncEventList) {
AppContext.getInstance().getAsyncEventBus().post(asyncEvent); AppContext.getInstance().getAsyncEventBus().post(asyncEvent);
} }
if (!temporaryFileList.isEmpty()) {
// Some files were created during this request, add a cleanup event to the queue
// It works because we are using a one thread executor
AppContext.getInstance().getAsyncEventBus().post(new TemporaryFileCleanupAsyncEvent(temporaryFileList));
}
} }
} }

View File

@ -3,6 +3,10 @@ package com.sismics.util.mime;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
@ -20,17 +24,17 @@ public class MimeTypeUtil {
/** /**
* Try to guess the MIME type of a file by its magic number (header). * Try to guess the MIME type of a file by its magic number (header).
* *
* @param is Stream to inspect * @param file File to inspect
* @param name File name * @param name File name
* @return MIME type * @return MIME type
* @throws IOException e * @throws IOException e
*/ */
public static String guessMimeType(InputStream is, String name) throws IOException { public static String guessMimeType(Path file, String name) throws IOException {
byte[] headerBytes = new byte[64]; try (InputStream is = Files.newInputStream(file)) {
is.mark(headerBytes.length); byte[] headerBytes = new byte[64];
is.read(headerBytes); is.read(headerBytes);
is.reset(); return guessMimeType(headerBytes, name);
return guessMimeType(headerBytes, name); }
} }
/** /**
@ -107,39 +111,38 @@ public class MimeTypeUtil {
* are simple ZIP files on the outside and much bigger on the inside. * are simple ZIP files on the outside and much bigger on the inside.
* *
* @param file File * @param file File
* @param inputStream Input stream * @param unencryptedFile File on disk
* @return MIME type * @return MIME type
*/ */
public static String guessOpenDocumentFormat(File file, InputStream inputStream) { public static String guessOpenDocumentFormat(File file, Path unencryptedFile) {
if (!MimeType.APPLICATION_ZIP.equals(file.getMimeType())) { if (!MimeType.APPLICATION_ZIP.equals(file.getMimeType())) {
// open document formats are ZIP files // open document formats are ZIP files
return file.getMimeType(); return file.getMimeType();
} }
String mimeType = file.getMimeType(); String mimeType = file.getMimeType();
try (ZipArchiveInputStream archiveInputStream = new ZipArchiveInputStream(inputStream, Charsets.ISO_8859_1.name())) { try (InputStream inputStream = Files.newInputStream(unencryptedFile);
ArchiveEntry archiveEntry = archiveInputStream.getNextEntry(); ZipInputStream zipInputStream = new ZipInputStream(inputStream, Charsets.ISO_8859_1)) {
ZipEntry archiveEntry = zipInputStream.getNextEntry();
while (archiveEntry != null) { while (archiveEntry != null) {
if (archiveEntry.getName().equals("mimetype")) { if (archiveEntry.getName().equals("mimetype")) {
// Maybe it's an ODT file // Maybe it's an ODT file
String content = new String(IOUtils.toByteArray(archiveInputStream), Charsets.ISO_8859_1); String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1);
if (MimeType.OPEN_DOCUMENT_TEXT.equals(content.trim())) { if (MimeType.OPEN_DOCUMENT_TEXT.equals(content.trim())) {
mimeType = MimeType.OPEN_DOCUMENT_TEXT; mimeType = MimeType.OPEN_DOCUMENT_TEXT;
break; break;
} }
} else if (archiveEntry.getName().equals("[Content_Types].xml")) { } else if (archiveEntry.getName().equals("[Content_Types].xml")) {
// Maybe it's a DOCX file // Maybe it's a DOCX file
String content = new String(IOUtils.toByteArray(archiveInputStream), Charsets.ISO_8859_1); String content = new String(IOUtils.toByteArray(zipInputStream), Charsets.ISO_8859_1);
if (content.contains(MimeType.OFFICE_DOCUMENT)) { if (content.contains(MimeType.OFFICE_DOCUMENT)) {
mimeType = MimeType.OFFICE_DOCUMENT; mimeType = MimeType.OFFICE_DOCUMENT;
break; break;
} }
} }
archiveEntry = archiveInputStream.getNextEntry(); archiveEntry = zipInputStream.getNextEntry();
} }
inputStream.reset();
} catch (Exception e) { } catch (Exception e) {
// In case of any error, just give up and keep the ZIP MIME type // In case of any error, just give up and keep the ZIP MIME type
return file.getMimeType(); return file.getMimeType();

View File

@ -1,18 +1,20 @@
package com.sismics.docs.core.util; package com.sismics.docs.core.util;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.Date;
import org.junit.Assert;
import org.junit.Test;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.io.Resources; import com.google.common.io.Resources;
import com.sismics.docs.core.dao.jpa.dto.DocumentDto; import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
import com.sismics.util.mime.MimeType; import com.sismics.util.mime.MimeType;
import org.junit.Assert;
import org.junit.Test;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.Date;
/** /**
* Test of the file entity utilities. * Test of the file entity utilities.
@ -22,26 +24,22 @@ import com.sismics.util.mime.MimeType;
public class TestFileUtil { public class TestFileUtil {
@Test @Test
public void extractContentOpenDocumentTextTest() throws Exception { public void extractContentOpenDocumentTextTest() throws Exception {
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream()) { Path path = Paths.get(ClassLoader.getSystemResource("file/document.odt").toURI());
File file = new File(); File file = new File();
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT); file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
try (InputStream pdfInputStream = PdfUtil.convertToPdf(file, inputStream, false)) { Path pdfPath = PdfUtil.convertToPdf(file, path);
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream); String content = FileUtil.extractContent(null, file, path, pdfPath);
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen.")); Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
}
}
} }
@Test @Test
public void extractContentOfficeDocumentTest() throws Exception { public void extractContentOfficeDocumentTest() throws Exception {
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream()) { Path path = Paths.get(ClassLoader.getSystemResource("file/document.docx").toURI());
File file = new File(); File file = new File();
file.setMimeType(MimeType.OFFICE_DOCUMENT); file.setMimeType(MimeType.OFFICE_DOCUMENT);
try (InputStream pdfInputStream = PdfUtil.convertToPdf(file, inputStream, false)) { Path pdfPath = PdfUtil.convertToPdf(file, path);
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream); String content = FileUtil.extractContent(null, file, path, pdfPath);
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen.")); Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
}
}
} }
@Test @Test
@ -97,8 +95,9 @@ public class TestFileUtil {
file4.setId("document_odt"); file4.setId("document_odt");
file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT); file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
InputStream is = PdfUtil.convertToPdf(documentDto, Lists.newArrayList(file0, file1, file2, file3, file4), true, true, 10); ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
is.close(); PdfUtil.convertToPdf(documentDto, Lists.newArrayList(file0, file1, file2, file3, file4), true, true, 10, outputStream);
Assert.assertTrue(outputStream.toByteArray().length > 0);
} }
} }
} }

View File

@ -1,16 +1,13 @@
package com.sismics.util; package com.sismics.util;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.junit.Assert;
import org.junit.Test;
import com.google.common.io.Resources;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
import com.sismics.util.mime.MimeType; import com.sismics.util.mime.MimeType;
import com.sismics.util.mime.MimeTypeUtil; import com.sismics.util.mime.MimeTypeUtil;
import org.junit.Assert;
import org.junit.Test;
import java.nio.file.Path;
import java.nio.file.Paths;
/** /**
* Test of the utilities to check MIME types. * Test of the utilities to check MIME types.
@ -18,23 +15,18 @@ import com.sismics.util.mime.MimeTypeUtil;
* @author bgamard * @author bgamard
*/ */
public class TestMimeTypeUtil { public class TestMimeTypeUtil {
@Test @Test
public void guessOpenDocumentFormatTest() throws Exception { public void guessOpenDocumentFormatTest() throws Exception {
// Detect ODT files // Detect ODT files
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream(); Path path = Paths.get(ClassLoader.getSystemResource("file/document.odt").toURI());
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) { File file = new File();
File file = new File(); file.setMimeType(MimeType.APPLICATION_ZIP);
file.setMimeType(MimeType.APPLICATION_ZIP); Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessOpenDocumentFormat(file, path));
Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
}
// Detect DOCX files // Detect DOCX files
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream(); path = Paths.get(ClassLoader.getSystemResource("file/document.docx").toURI());
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) { file = new File();
File file = new File(); file.setMimeType(MimeType.APPLICATION_ZIP);
file.setMimeType(MimeType.APPLICATION_ZIP); Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessOpenDocumentFormat(file, path));
Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
}
} }
} }

View File

@ -275,16 +275,10 @@ public class DocumentResource extends BaseResource {
StreamingOutput stream = new StreamingOutput() { StreamingOutput stream = new StreamingOutput() {
@Override @Override
public void write(OutputStream outputStream) throws IOException, WebApplicationException { public void write(OutputStream outputStream) throws IOException, WebApplicationException {
try (InputStream inputStream = PdfUtil.convertToPdf(documentDto, fileList, fitImageToPage, metadata, margin)) { try {
ByteStreams.copy(inputStream, outputStream); PdfUtil.convertToPdf(documentDto, fileList, fitImageToPage, metadata, margin, outputStream);
} catch (Exception e) { } catch (Exception e) {
throw new IOException(e); throw new IOException(e);
} finally {
try {
outputStream.close();
} catch (IOException e) {
// Ignore
}
} }
} }
}; };

View File

@ -14,10 +14,7 @@ import com.sismics.docs.core.event.FileCreatedAsyncEvent;
import com.sismics.docs.core.event.FileDeletedAsyncEvent; import com.sismics.docs.core.event.FileDeletedAsyncEvent;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.model.jpa.User; import com.sismics.docs.core.model.jpa.User;
import com.sismics.docs.core.util.DirectoryUtil; import com.sismics.docs.core.util.*;
import com.sismics.docs.core.util.EncryptionUtil;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.docs.core.util.PdfUtil;
import com.sismics.rest.exception.ClientException; import com.sismics.rest.exception.ClientException;
import com.sismics.rest.exception.ForbiddenClientException; import com.sismics.rest.exception.ForbiddenClientException;
import com.sismics.rest.exception.ServerException; import com.sismics.rest.exception.ServerException;
@ -37,13 +34,11 @@ import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response; import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status; import javax.ws.rs.core.Response.Status;
import javax.ws.rs.core.StreamingOutput; import javax.ws.rs.core.StreamingOutput;
import java.io.ByteArrayInputStream; import java.io.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.text.MessageFormat; import java.text.MessageFormat;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
@ -114,27 +109,29 @@ public class FileResource extends BaseResource {
} }
} }
// Keep unencrypted data in memory, because we will need it two times // Keep unencrypted data temporary on disk, because we will need it two times
byte[] fileData; java.nio.file.Path unencryptedFile;
long fileSize;
try { try {
fileData = ByteStreams.toByteArray(fileBodyPart.getValueAs(InputStream.class)); unencryptedFile = ThreadLocalContext.get().createTemporaryFile();
Files.copy(fileBodyPart.getValueAs(InputStream.class), unencryptedFile, StandardCopyOption.REPLACE_EXISTING);
fileSize = Files.size(unencryptedFile);
} catch (IOException e) { } catch (IOException e) {
throw new ServerException("StreamError", "Error reading the input file", e); throw new ServerException("StreamError", "Error reading the input file", e);
} }
InputStream fileInputStream = new ByteArrayInputStream(fileData);
// Validate mime type // Validate mime type
String name = fileBodyPart.getContentDisposition() != null ? String name = fileBodyPart.getContentDisposition() != null ?
fileBodyPart.getContentDisposition().getFileName() : null; fileBodyPart.getContentDisposition().getFileName() : null;
String mimeType; String mimeType;
try { try {
mimeType = MimeTypeUtil.guessMimeType(fileInputStream, name); mimeType = MimeTypeUtil.guessMimeType(unencryptedFile, name);
} catch (IOException e) { } catch (IOException e) {
throw new ServerException("ErrorGuessMime", "Error guessing mime type", e); throw new ServerException("ErrorGuessMime", "Error guessing mime type", e);
} }
// Validate quota // Validate quota
if (user.getStorageCurrent() + fileData.length > user.getStorageQuota()) { if (user.getStorageCurrent() + fileSize > user.getStorageQuota()) {
throw new ClientException("QuotaReached", "Quota limit reached"); throw new ClientException("QuotaReached", "Quota limit reached");
} }
@ -158,16 +155,16 @@ public class FileResource extends BaseResource {
String fileId = fileDao.create(file, principal.getId()); String fileId = fileDao.create(file, principal.getId());
// Guess the mime type a second time, for open document format (first detected as simple ZIP file) // Guess the mime type a second time, for open document format (first detected as simple ZIP file)
file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, fileInputStream)); file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, unencryptedFile));
// Convert to PDF if necessary (for thumbnail and text extraction) // Convert to PDF if necessary (for thumbnail and text extraction)
InputStream pdfIntputStream = PdfUtil.convertToPdf(file, fileInputStream, true); java.nio.file.Path unencryptedPdfFile = PdfUtil.convertToPdf(file, unencryptedFile);
// Save the file // Save the file
FileUtil.save(fileInputStream, pdfIntputStream, file, user.getPrivateKey()); FileUtil.save(unencryptedFile, unencryptedPdfFile, file, user.getPrivateKey());
// Update the user quota // Update the user quota
user.setStorageCurrent(user.getStorageCurrent() + fileData.length); user.setStorageCurrent(user.getStorageCurrent() + fileSize);
userDao.updateQuota(user); userDao.updateQuota(user);
// Raise a new file created event and document updated event if we have a document // Raise a new file created event and document updated event if we have a document
@ -176,8 +173,8 @@ public class FileResource extends BaseResource {
fileCreatedAsyncEvent.setUserId(principal.getId()); fileCreatedAsyncEvent.setUserId(principal.getId());
fileCreatedAsyncEvent.setLanguage(documentDto.getLanguage()); fileCreatedAsyncEvent.setLanguage(documentDto.getLanguage());
fileCreatedAsyncEvent.setFile(file); fileCreatedAsyncEvent.setFile(file);
fileCreatedAsyncEvent.setInputStream(fileInputStream); fileCreatedAsyncEvent.setUnencryptedFile(unencryptedFile);
fileCreatedAsyncEvent.setPdfInputStream(pdfIntputStream); fileCreatedAsyncEvent.setUnencryptedPdfFile(unencryptedPdfFile);
ThreadLocalContext.get().addAsyncEvent(fileCreatedAsyncEvent); ThreadLocalContext.get().addAsyncEvent(fileCreatedAsyncEvent);
DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent(); DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent();
@ -190,7 +187,7 @@ public class FileResource extends BaseResource {
JsonObjectBuilder response = Json.createObjectBuilder() JsonObjectBuilder response = Json.createObjectBuilder()
.add("status", "ok") .add("status", "ok")
.add("id", fileId) .add("id", fileId)
.add("size", fileData.length); .add("size", fileSize);
return Response.ok().entity(response.build()).build(); return Response.ok().entity(response.build()).build();
} catch (Exception e) { } catch (Exception e) {
throw new ServerException("FileError", "Error adding a file", e); throw new ServerException("FileError", "Error adding a file", e);
@ -254,13 +251,13 @@ public class FileResource extends BaseResource {
// Raise a new file created event and document updated event (it wasn't sent during file creation) // Raise a new file created event and document updated event (it wasn't sent during file creation)
try { try {
java.nio.file.Path storedFile = DirectoryUtil.getStorageDirectory().resolve(id); java.nio.file.Path storedFile = DirectoryUtil.getStorageDirectory().resolve(id);
InputStream fileInputStream = Files.newInputStream(storedFile); java.nio.file.Path unencryptedFile = EncryptionUtil.decryptFile(storedFile, user.getPrivateKey());
final InputStream responseInputStream = EncryptionUtil.decryptInputStream(fileInputStream, user.getPrivateKey());
FileCreatedAsyncEvent fileCreatedAsyncEvent = new FileCreatedAsyncEvent(); FileCreatedAsyncEvent fileCreatedAsyncEvent = new FileCreatedAsyncEvent();
fileCreatedAsyncEvent.setUserId(principal.getId()); fileCreatedAsyncEvent.setUserId(principal.getId());
fileCreatedAsyncEvent.setLanguage(documentDto.getLanguage()); fileCreatedAsyncEvent.setLanguage(documentDto.getLanguage());
fileCreatedAsyncEvent.setFile(file); fileCreatedAsyncEvent.setFile(file);
fileCreatedAsyncEvent.setInputStream(responseInputStream); fileCreatedAsyncEvent.setUnencryptedFile(unencryptedFile);
fileCreatedAsyncEvent.setUnencryptedPdfFile(PdfUtil.convertToPdf(file, unencryptedFile));
ThreadLocalContext.get().addAsyncEvent(fileCreatedAsyncEvent); ThreadLocalContext.get().addAsyncEvent(fileCreatedAsyncEvent);
DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent(); DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent();

View File

@ -332,7 +332,7 @@
"filter": { "filter": {
"filesize": { "filesize": {
"mb": "Mo", "mb": "Mo",
"kb": "Ko" "kb": "ko"
} }
}, },
"acl": { "acl": {

View File

@ -1,10 +1,16 @@
package com.sismics.docs.rest; package com.sismics.docs.rest;
import java.io.BufferedInputStream; import com.google.common.io.ByteStreams;
import java.io.InputStream; import com.google.common.io.Resources;
import java.nio.file.Files; import com.sismics.docs.core.util.DirectoryUtil;
import java.nio.file.Path; import com.sismics.util.filter.TokenBasedSecurityFilter;
import java.util.Date; import com.sismics.util.mime.MimeType;
import com.sismics.util.mime.MimeTypeUtil;
import org.glassfish.jersey.media.multipart.FormDataMultiPart;
import org.glassfish.jersey.media.multipart.MultiPartFeature;
import org.glassfish.jersey.media.multipart.file.StreamDataBodyPart;
import org.junit.Assert;
import org.junit.Test;
import javax.json.JsonArray; import javax.json.JsonArray;
import javax.json.JsonObject; import javax.json.JsonObject;
@ -13,19 +19,10 @@ import javax.ws.rs.core.Form;
import javax.ws.rs.core.MediaType; import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response; import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status; import javax.ws.rs.core.Response.Status;
import java.io.InputStream;
import org.glassfish.jersey.media.multipart.FormDataMultiPart; import java.nio.file.Files;
import org.glassfish.jersey.media.multipart.MultiPartFeature; import java.nio.file.Path;
import org.glassfish.jersey.media.multipart.file.StreamDataBodyPart; import java.util.Date;
import org.junit.Assert;
import org.junit.Test;
import com.google.common.io.ByteStreams;
import com.google.common.io.Resources;
import com.sismics.docs.core.util.DirectoryUtil;
import com.sismics.util.filter.TokenBasedSecurityFilter;
import com.sismics.util.mime.MimeType;
import com.sismics.util.mime.MimeTypeUtil;
/** /**
* Exhaustive test of the file resource. * Exhaustive test of the file resource.
@ -123,10 +120,8 @@ public class TestFileResource extends BaseJerseyTest {
// Check that the files are not readable directly from FS // Check that the files are not readable directly from FS
Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file1Id); Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file1Id);
try (InputStream storedFileInputStream = new BufferedInputStream(Files.newInputStream(storedFile))) { Assert.assertEquals(MimeType.DEFAULT, MimeTypeUtil.guessMimeType(storedFile, null));
Assert.assertEquals(MimeType.DEFAULT, MimeTypeUtil.guessMimeType(storedFileInputStream, null));
}
// Get all files from a document // Get all files from a document
json = target().path("/file/list") json = target().path("/file/list")
.queryParam("id", document1Id) .queryParam("id", document1Id)