diff --git a/docs-core/src/main/java/com/sismics/docs/core/model/jpa/File.java b/docs-core/src/main/java/com/sismics/docs/core/model/jpa/File.java index 401e1f7e..64682bc6 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/model/jpa/File.java +++ b/docs-core/src/main/java/com/sismics/docs/core/model/jpa/File.java @@ -7,6 +7,7 @@ import javax.persistence.Entity; import javax.persistence.Id; import javax.persistence.Lob; import javax.persistence.Table; +import javax.persistence.Transient; import com.google.common.base.MoreObjects; @@ -69,150 +70,85 @@ public class File implements Loggable { private Integer order; /** - * Getter of id. - * - * @return the id + * Private key to decrypt the file. + * Not saved to database, of course. */ + @Transient + private String privateKey; + public String getId() { return id; } - /** - * Setter of id. - * - * @param id id - */ public void setId(String id) { this.id = id; } - /** - * Getter of documentId. - * - * @return the documentId - */ public String getDocumentId() { return documentId; } - /** - * Setter of documentId. - * - * @param documentId documentId - */ public void setDocumentId(String documentId) { this.documentId = documentId; } - - /** - * Getter of mimeType. - * - * @return the mimeType - */ + public String getMimeType() { return mimeType; } - /** - * Setter of mimeType. - * - * @param mimeType mimeType - */ public void setMimeType(String mimeType) { this.mimeType = mimeType; } - /** - * Getter of createDate. - * - * @return the createDate - */ public Date getCreateDate() { return createDate; } - /** - * Setter of createDate. - * - * @param createDate createDate - */ public void setCreateDate(Date createDate) { this.createDate = createDate; } - /** - * Getter of deleteDate. - * - * @return the deleteDate - */ @Override public Date getDeleteDate() { return deleteDate; } - /** - * Setter of deleteDate. - * - * @param deleteDate deleteDate - */ public void setDeleteDate(Date deleteDate) { this.deleteDate = deleteDate; } - /** - * Getter of content. - * - * @return the content - */ public String getContent() { return content; } - /** - * Setter of content. - * - * @param content content - */ public void setContent(String content) { this.content = content; } - /** - * Getter of order. - * - * @return the order - */ public Integer getOrder() { return order; } - /** - * Setter of order. - * - * @param order order - */ public void setOrder(Integer order) { this.order = order; } - /** - * Getter of userId. - * - * @return the userId - */ public String getUserId() { return userId; } - /** - * Setter of userId. - * - * @param userId userId - */ public void setUserId(String userId) { this.userId = userId; } + public String getPrivateKey() { + return privateKey; + } + + public void setPrivateKey(String privateKey) { + this.privateKey = privateKey; + } + @Override public String toString() { return MoreObjects.toStringHelper(this) diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java b/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java index 59b30bec..82f9de31 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java +++ b/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java @@ -8,13 +8,22 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; +import java.util.List; import javax.crypto.Cipher; import javax.crypto.CipherInputStream; import javax.crypto.CipherOutputStream; import javax.imageio.ImageIO; +import org.apache.pdfbox.io.MemoryUsageSetting; +import org.apache.pdfbox.multipdf.PDFMergerUtility; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory; +import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.poi.xwpf.usermodel.XWPFDocument; @@ -27,6 +36,7 @@ import org.odftoolkit.odfdom.doc.OdfTextDocument; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.io.Closer; import com.sismics.docs.core.model.jpa.Document; import com.sismics.docs.core.model.jpa.File; import com.sismics.tess4j.Tesseract; @@ -130,23 +140,24 @@ public class FileUtil { /** * Convert a file to PDF if necessary. * - * @param inputStream InputStream * @param file File + * @param inputStream InputStream + * @param reset Reset the stream after usage * @return PDF input stream * @throws Exception */ - public static InputStream convertToPdf(InputStream inputStream, File file) throws Exception { + public static InputStream convertToPdf(File file, InputStream inputStream, boolean reset) throws Exception { if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) { // It's already PDF, just return the input return inputStream; } if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) { - return convertOfficeDocument(inputStream); + return convertOfficeDocument(inputStream, reset); } if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) { - return convertOpenDocumentText(inputStream); + return convertOpenDocumentText(inputStream, reset); } // PDF conversion not necessary/possible @@ -157,15 +168,18 @@ public class FileUtil { * Convert an open document text file to PDF. * * @param inputStream Unencrypted input stream + * @param reset Reset the stream after usage * @return PDF input stream * @throws Exception */ - private static InputStream convertOpenDocumentText(InputStream inputStream) throws Exception { + private static InputStream convertOpenDocumentText(InputStream inputStream, boolean reset) throws Exception { ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream(); OdfTextDocument document = OdfTextDocument.loadDocument(inputStream); PdfOptions options = PdfOptions.create(); PdfConverter.getInstance().convert(document, pdfOutputStream, options); - inputStream.reset(); + if (reset) { + inputStream.reset(); + } return new ByteArrayInputStream(pdfOutputStream.toByteArray()); } @@ -173,15 +187,18 @@ public class FileUtil { * Convert an Office document to PDF. * * @param inputStream Unencrypted input stream + * @param reset Reset the stream after usage * @return PDF input stream * @throws Exception */ - private static InputStream convertOfficeDocument(InputStream inputStream) throws Exception { + private static InputStream convertOfficeDocument(InputStream inputStream, boolean reset) throws Exception { ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream(); XWPFDocument document = new XWPFDocument(inputStream); org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create(); org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, pdfOutputStream, options); - inputStream.reset(); + if (reset) { + inputStream.reset(); + } return new ByteArrayInputStream(pdfOutputStream.toByteArray()); } @@ -272,4 +289,94 @@ public class FileUtil { Files.delete(thumbnailFile); } } + + /** + * Convert a document and its files to a merged PDF file. + * + * @param fileList List of files + * @param fitImageToPage Fill images to the page + * @param margin Margins in millimeters + * @return PDF input stream + * @throws IOException + */ + public static InputStream convertToPdf(List fileList, boolean fitImageToPage, int margin) throws Exception { + // TODO PDF Export: Option to add a front page with: + // document title, document description, creator, date created, language, + // list of all files (and information if it is in this document or not) + // TODO PDF Export: Option to add the comments + + // Create a blank PDF + Closer closer = Closer.create(); + MemoryUsageSetting memUsageSettings = MemoryUsageSetting.setupMixed(1000000); // 1MB max memory usage + memUsageSettings.setTempDir(new java.io.File(System.getProperty("java.io.tmpdir"))); // To OS temp + float mmPerInch = 1 / (10 * 2.54f) * 72f; + + try (PDDocument doc = new PDDocument(memUsageSettings)) { + // Add files + for (File file : fileList) { + Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file.getId()); + try (InputStream storedFileInputStream = file.getPrivateKey() == null ? // Try to decrypt the file if we have a private key available + Files.newInputStream(storedFile) : EncryptionUtil.decryptInputStream(Files.newInputStream(storedFile), file.getPrivateKey())) { + if (ImageUtil.isImage(file.getMimeType())) { + PDPage page = new PDPage(PDRectangle.A4); // Images into A4 pages + try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) { + // Read the image using the correct handler. PDFBox can't do it because it relies wrongly on file extension + PDImageXObject pdImage = null; + if (file.getMimeType().equals(MimeType.IMAGE_JPEG)) { + pdImage = JPEGFactory.createFromStream(doc, storedFileInputStream); + } else if (file.getMimeType().equals(MimeType.IMAGE_GIF) || file.getMimeType().equals(MimeType.IMAGE_PNG)) { + BufferedImage bim = ImageIO.read(storedFileInputStream); + pdImage = LosslessFactory.createFromImage(doc, bim); + } + + if (fitImageToPage) { + // Fill the page with the image + float widthAvailable = page.getMediaBox().getWidth() - 2 * margin * mmPerInch; + float heightAvailable = page.getMediaBox().getHeight() - 2 * margin * mmPerInch; + + // Compare page format and image format + if (widthAvailable / heightAvailable < (float) pdImage.getWidth() / (float) pdImage.getHeight()) { + float imageHeight = widthAvailable / pdImage.getWidth() * pdImage.getHeight(); + contentStream.drawImage(pdImage, margin * mmPerInch, heightAvailable + margin * mmPerInch - imageHeight, + widthAvailable, imageHeight); + } else { + float imageWidth = heightAvailable / pdImage.getHeight() * pdImage.getWidth(); + contentStream.drawImage(pdImage, margin * mmPerInch, margin * mmPerInch, + imageWidth, heightAvailable); + } + } else { + // Draw the image as is + contentStream.drawImage(pdImage, margin * mmPerInch, + page.getMediaBox().getHeight() - pdImage.getHeight() - margin * mmPerInch); + } + } + doc.addPage(page); + } else { + // Try to convert the file to PDF + InputStream pdfInputStream = convertToPdf(file, storedFileInputStream, false); + if (pdfInputStream != null) { + // This file is convertible to PDF, just add it to the end + try { + PDDocument mergeDoc = PDDocument.load(pdfInputStream, memUsageSettings); + closer.register(mergeDoc); + PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); + pdfMergerUtility.appendDocument(doc, mergeDoc); + } finally { + pdfInputStream.close(); + } + } + + // All other non-PDF-convertible files are ignored + } + } + } + + // Save to a temporary file + try (TemporaryFileStream temporaryFileStream = new TemporaryFileStream()) { + doc.save(temporaryFileStream.openWriteStream()); + closer.close(); // Close all remaining opened PDF + return temporaryFileStream.openReadStream(); + } + } + } } diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/TemporaryFileStream.java b/docs-core/src/main/java/com/sismics/docs/core/util/TemporaryFileStream.java new file mode 100644 index 00000000..9b357f05 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/util/TemporaryFileStream.java @@ -0,0 +1,55 @@ +package com.sismics.docs.core.util; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.UUID; + +/** + * Utilities for writing and reading to a temporary file. + * + * @author bgamard + */ +public class TemporaryFileStream implements Closeable { + /** + * Temporary file. + */ + private Path tempFile; + + /** + * Construct a temporary file. + * + * @throws IOException + */ + public TemporaryFileStream() throws IOException { + tempFile = Files.createTempFile(UUID.randomUUID().toString(), ".tmp"); + } + + /** + * Open a stream for writing. + * + * @return OutputStream + * @throws IOException + */ + public OutputStream openWriteStream() throws IOException { + return Files.newOutputStream(tempFile); + } + + /** + * Open a stream for reading. + * + * @return InputStream + * @throws IOException + */ + public InputStream openReadStream() throws IOException { + return Files.newInputStream(tempFile); + } + + @Override + public void close() throws IOException { + Files.delete(tempFile); + } +} \ No newline at end of file diff --git a/docs-core/src/main/java/com/sismics/util/mime/MimeType.java b/docs-core/src/main/java/com/sismics/util/mime/MimeType.java index e5821a62..b85517ef 100644 --- a/docs-core/src/main/java/com/sismics/util/mime/MimeType.java +++ b/docs-core/src/main/java/com/sismics/util/mime/MimeType.java @@ -6,9 +6,6 @@ package com.sismics.util.mime; * @author jtremeaux */ public class MimeType { - - public static final String IMAGE_X_ICON = "image/x-icon"; - public static final String IMAGE_PNG = "image/png"; public static final String IMAGE_JPEG = "image/jpeg"; diff --git a/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java b/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java index 058f7f7d..fcb3d2ec 100644 --- a/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java +++ b/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java @@ -55,8 +55,6 @@ public class MimeTypeUtil { } else if (headerBytes[0] == ((byte) 0x89) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x4e) && headerBytes[3] == ((byte) 0x47) && headerBytes[4] == ((byte) 0x0d) && headerBytes[5] == ((byte) 0x0a) && headerBytes[6] == ((byte) 0x1a) && headerBytes[7] == ((byte) 0x0a)) { return MimeType.IMAGE_PNG; - } else if (headerBytes[0] == ((byte) 0x00) && headerBytes[1] == ((byte) 0x00) && headerBytes[2] == ((byte) 0x01) && headerBytes[3] == ((byte) 0x00)) { - return MimeType.IMAGE_X_ICON; } else if (headerBytes[0] == ((byte) 0x25) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x44) && headerBytes[3] == ((byte) 0x46)) { return MimeType.APPLICATION_PDF; } @@ -80,8 +78,6 @@ public class MimeTypeUtil { return "jpg"; case MimeType.IMAGE_PNG: return "png"; - case MimeType.IMAGE_X_ICON: - return "ico"; case MimeType.APPLICATION_PDF: return "pdf"; case MimeType.OPEN_DOCUMENT_TEXT: diff --git a/docs-core/src/test/java/com/sismics/docs/core/util/TestEncryptUtil.java b/docs-core/src/test/java/com/sismics/docs/core/util/TestEncryptUtil.java index b6f54964..39477e5f 100644 --- a/docs-core/src/test/java/com/sismics/docs/core/util/TestEncryptUtil.java +++ b/docs-core/src/test/java/com/sismics/docs/core/util/TestEncryptUtil.java @@ -18,11 +18,6 @@ import com.google.common.io.ByteStreams; * @author bgamard */ public class TestEncryptUtil { - /** - * Test private key. - */ - String pk = "OnceUponATime"; - @Test public void generatePrivateKeyTest() throws Exception { String key = EncryptionUtil.generatePrivateKey(); @@ -38,7 +33,7 @@ public class TestEncryptUtil { } catch (IllegalArgumentException e) { // NOP } - Cipher cipher = EncryptionUtil.getEncryptionCipher(pk); + Cipher cipher = EncryptionUtil.getEncryptionCipher("OnceUponATime"); InputStream inputStream = new CipherInputStream(this.getClass().getResourceAsStream("/file/udhr.pdf"), cipher); byte[] encryptedData = ByteStreams.toByteArray(inputStream); byte[] assertData = ByteStreams.toByteArray(this.getClass().getResourceAsStream("/file/udhr_encrypted.pdf")); @@ -48,7 +43,8 @@ public class TestEncryptUtil { @Test public void decryptStreamTest() throws Exception { - InputStream inputStream = EncryptionUtil.decryptInputStream(this.getClass().getResourceAsStream("/file/udhr_encrypted.pdf"), pk); + InputStream inputStream = EncryptionUtil.decryptInputStream( + this.getClass().getResourceAsStream("/file/udhr_encrypted.pdf"), "OnceUponATime"); byte[] encryptedData = ByteStreams.toByteArray(inputStream); byte[] assertData = ByteStreams.toByteArray(this.getClass().getResourceAsStream("/file/udhr.pdf")); diff --git a/docs-core/src/test/java/com/sismics/docs/core/util/TestFileUtil.java b/docs-core/src/test/java/com/sismics/docs/core/util/TestFileUtil.java index 1482d88c..e8dff58a 100644 --- a/docs-core/src/test/java/com/sismics/docs/core/util/TestFileUtil.java +++ b/docs-core/src/test/java/com/sismics/docs/core/util/TestFileUtil.java @@ -1,13 +1,14 @@ package com.sismics.docs.core.util; -import java.io.ByteArrayInputStream; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; import junit.framework.Assert; -import org.apache.pdfbox.io.IOUtils; import org.junit.Test; +import com.google.common.collect.Lists; import com.google.common.io.Resources; import com.sismics.docs.core.model.jpa.File; import com.sismics.util.mime.MimeType; @@ -20,11 +21,10 @@ import com.sismics.util.mime.MimeType; public class TestFileUtil { @Test public void extractContentOpenDocumentTextTest() throws Exception { - try (InputStream inputStream = Resources.getResource("file/document.odt").openStream(); - InputStream bytesInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) { + try (InputStream inputStream = Resources.getResource("file/document.odt").openStream()) { File file = new File(); file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT); - try (InputStream pdfInputStream = FileUtil.convertToPdf(bytesInputStream, file)) { + try (InputStream pdfInputStream = FileUtil.convertToPdf(file, inputStream, false)) { String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream); Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen.")); } @@ -33,14 +33,55 @@ public class TestFileUtil { @Test public void extractContentOfficeDocumentTest() throws Exception { - try (InputStream inputStream = Resources.getResource("file/document.docx").openStream(); - InputStream bytesInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) { + try (InputStream inputStream = Resources.getResource("file/document.docx").openStream()) { File file = new File(); file.setMimeType(MimeType.OFFICE_DOCUMENT); - try (InputStream pdfInputStream = FileUtil.convertToPdf(bytesInputStream, file)) { + try (InputStream pdfInputStream = FileUtil.convertToPdf(file, inputStream, false)) { String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream); Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen.")); } } } + + @Test + public void convertToPdfTest() throws Exception { + try (InputStream inputStream0 = Resources.getResource("file/apollo_landscape.jpg").openStream(); + InputStream inputStream1 = Resources.getResource("file/apollo_portrait.jpg").openStream(); + InputStream inputStream2 = Resources.getResource("file/udhr_encrypted.pdf").openStream(); + InputStream inputStream3 = Resources.getResource("file/document.docx").openStream(); + InputStream inputStream4 = Resources.getResource("file/document.odt").openStream()) { + // First file + Files.copy(inputStream0, DirectoryUtil.getStorageDirectory().resolve("apollo_landscape"), StandardCopyOption.REPLACE_EXISTING); + File file0 = new File(); + file0.setId("apollo_landscape"); + file0.setMimeType(MimeType.IMAGE_JPEG); + + // Second file + Files.copy(inputStream1, DirectoryUtil.getStorageDirectory().resolve("apollo_portrait"), StandardCopyOption.REPLACE_EXISTING); + File file1 = new File(); + file1.setId("apollo_portrait"); + file1.setMimeType(MimeType.IMAGE_JPEG); + + // Third file + Files.copy(inputStream2, DirectoryUtil.getStorageDirectory().resolve("udhr"), StandardCopyOption.REPLACE_EXISTING); + File file2 = new File(); + file2.setId("udhr"); + file2.setPrivateKey("OnceUponATime"); + file2.setMimeType(MimeType.APPLICATION_PDF); + + // Fourth file + Files.copy(inputStream3, DirectoryUtil.getStorageDirectory().resolve("document_docx"), StandardCopyOption.REPLACE_EXISTING); + File file3 = new File(); + file3.setId("document_docx"); + file3.setMimeType(MimeType.OFFICE_DOCUMENT); + + // Fifth file + Files.copy(inputStream4, DirectoryUtil.getStorageDirectory().resolve("document_odt"), StandardCopyOption.REPLACE_EXISTING); + File file4 = new File(); + file4.setId("document_odt"); + file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT); + + FileUtil.convertToPdf(Lists.newArrayList(file0, file1, file2, file3, file4), true, 10).close(); + } + } } diff --git a/docs-core/src/test/resources/file/apollo_landscape.jpg b/docs-core/src/test/resources/file/apollo_landscape.jpg new file mode 100644 index 00000000..aae2daf8 Binary files /dev/null and b/docs-core/src/test/resources/file/apollo_landscape.jpg differ diff --git a/docs-core/src/test/resources/file/apollo_portrait.jpg b/docs-core/src/test/resources/file/apollo_portrait.jpg new file mode 100644 index 00000000..51b0193b Binary files /dev/null and b/docs-core/src/test/resources/file/apollo_portrait.jpg differ diff --git a/docs-parent/pom.xml b/docs-parent/pom.xml index 40da3594..cf977d0b 100644 --- a/docs-parent/pom.xml +++ b/docs-parent/pom.xml @@ -30,7 +30,7 @@ 0.3m 4.2.0 4.2 - 2.0.0-RC1 + 2.0.0-RC2 1.53 2.9.1 4.1.0.Final diff --git a/docs-web/src/main/java/com/sismics/docs/rest/resource/FileResource.java b/docs-web/src/main/java/com/sismics/docs/rest/resource/FileResource.java index 4b37e080..539f79e2 100644 --- a/docs-web/src/main/java/com/sismics/docs/rest/resource/FileResource.java +++ b/docs-web/src/main/java/com/sismics/docs/rest/resource/FileResource.java @@ -150,7 +150,7 @@ public class FileResource extends BaseResource { file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, fileInputStream)); // Convert to PDF if necessary (for thumbnail and text extraction) - InputStream pdfIntputStream = FileUtil.convertToPdf(fileInputStream, file); + InputStream pdfIntputStream = FileUtil.convertToPdf(file, fileInputStream, true); // Save the file FileUtil.save(fileInputStream, pdfIntputStream, file, user.getPrivateKey());