mirror of
https://github.com/sismics/docs.git
synced 2024-11-22 14:07:55 +01:00
#55: Refactoring
This commit is contained in:
parent
eb61b06784
commit
0591f8a39f
@ -1,47 +1,27 @@
|
|||||||
package com.sismics.docs.core.util;
|
package com.sismics.docs.core.util;
|
||||||
|
|
||||||
import java.awt.image.BufferedImage;
|
import java.awt.image.BufferedImage;
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import javax.crypto.Cipher;
|
import javax.crypto.Cipher;
|
||||||
import javax.crypto.CipherInputStream;
|
import javax.crypto.CipherInputStream;
|
||||||
import javax.crypto.CipherOutputStream;
|
import javax.crypto.CipherOutputStream;
|
||||||
import javax.imageio.ImageIO;
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
import org.apache.pdfbox.io.MemoryUsageSetting;
|
|
||||||
import org.apache.pdfbox.multipdf.PDFMergerUtility;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
|
||||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
|
||||||
import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory;
|
|
||||||
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
|
|
||||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
|
||||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
|
||||||
import org.apache.pdfbox.text.PDFTextStripper;
|
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
|
||||||
import org.imgscalr.Scalr;
|
import org.imgscalr.Scalr;
|
||||||
import org.imgscalr.Scalr.Method;
|
import org.imgscalr.Scalr.Method;
|
||||||
import org.imgscalr.Scalr.Mode;
|
import org.imgscalr.Scalr.Mode;
|
||||||
import org.odftoolkit.odfdom.converter.pdf.PdfConverter;
|
|
||||||
import org.odftoolkit.odfdom.converter.pdf.PdfOptions;
|
|
||||||
import org.odftoolkit.odfdom.doc.OdfTextDocument;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.common.io.Closer;
|
|
||||||
import com.sismics.docs.core.model.jpa.Document;
|
import com.sismics.docs.core.model.jpa.Document;
|
||||||
import com.sismics.docs.core.model.jpa.File;
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
import com.sismics.tess4j.Tesseract;
|
import com.sismics.tess4j.Tesseract;
|
||||||
import com.sismics.util.ImageUtil;
|
import com.sismics.util.ImageUtil;
|
||||||
import com.sismics.util.mime.MimeType;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* File entity utilities.
|
* File entity utilities.
|
||||||
@ -69,7 +49,7 @@ public class FileUtil {
|
|||||||
if (ImageUtil.isImage(file.getMimeType())) {
|
if (ImageUtil.isImage(file.getMimeType())) {
|
||||||
content = ocrFile(inputStream, document);
|
content = ocrFile(inputStream, document);
|
||||||
} else if (pdfInputStream != null) {
|
} else if (pdfInputStream != null) {
|
||||||
content = extractPdf(pdfInputStream);
|
content = PdfUtil.extractPdf(pdfInputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
return content;
|
return content;
|
||||||
@ -109,99 +89,6 @@ public class FileUtil {
|
|||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract text from a PDF.
|
|
||||||
*
|
|
||||||
* @param inputStream Unencrypted input stream
|
|
||||||
* @return Content extracted
|
|
||||||
*/
|
|
||||||
private static String extractPdf(InputStream inputStream) {
|
|
||||||
String content = null;
|
|
||||||
PDDocument pdfDocument = null;
|
|
||||||
try {
|
|
||||||
PDFTextStripper stripper = new PDFTextStripper();
|
|
||||||
pdfDocument = PDDocument.load(inputStream);
|
|
||||||
content = stripper.getText(pdfDocument);
|
|
||||||
} catch (IOException e) {
|
|
||||||
log.error("Error while extracting text from the PDF", e);
|
|
||||||
} finally {
|
|
||||||
if (pdfDocument != null) {
|
|
||||||
try {
|
|
||||||
pdfDocument.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// NOP
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return content;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a file to PDF if necessary.
|
|
||||||
*
|
|
||||||
* @param file File
|
|
||||||
* @param inputStream InputStream
|
|
||||||
* @param reset Reset the stream after usage
|
|
||||||
* @return PDF input stream
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
|
||||||
public static InputStream convertToPdf(File file, InputStream inputStream, boolean reset) throws Exception {
|
|
||||||
if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
|
||||||
// It's already PDF, just return the input
|
|
||||||
return inputStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) {
|
|
||||||
return convertOfficeDocument(inputStream, reset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) {
|
|
||||||
return convertOpenDocumentText(inputStream, reset);
|
|
||||||
}
|
|
||||||
|
|
||||||
// PDF conversion not necessary/possible
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert an open document text file to PDF.
|
|
||||||
*
|
|
||||||
* @param inputStream Unencrypted input stream
|
|
||||||
* @param reset Reset the stream after usage
|
|
||||||
* @return PDF input stream
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
|
||||||
private static InputStream convertOpenDocumentText(InputStream inputStream, boolean reset) throws Exception {
|
|
||||||
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
|
||||||
OdfTextDocument document = OdfTextDocument.loadDocument(inputStream);
|
|
||||||
PdfOptions options = PdfOptions.create();
|
|
||||||
PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
|
||||||
if (reset) {
|
|
||||||
inputStream.reset();
|
|
||||||
}
|
|
||||||
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert an Office document to PDF.
|
|
||||||
*
|
|
||||||
* @param inputStream Unencrypted input stream
|
|
||||||
* @param reset Reset the stream after usage
|
|
||||||
* @return PDF input stream
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
|
||||||
private static InputStream convertOfficeDocument(InputStream inputStream, boolean reset) throws Exception {
|
|
||||||
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
|
||||||
XWPFDocument document = new XWPFDocument(inputStream);
|
|
||||||
org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create();
|
|
||||||
org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
|
||||||
if (reset) {
|
|
||||||
inputStream.reset();
|
|
||||||
}
|
|
||||||
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Save a file on the storage filesystem.
|
* Save a file on the storage filesystem.
|
||||||
*
|
*
|
||||||
@ -237,15 +124,8 @@ public class FileUtil {
|
|||||||
inputStream.reset();
|
inputStream.reset();
|
||||||
} else if(pdfInputStream != null) {
|
} else if(pdfInputStream != null) {
|
||||||
// Generate preview from the first page of the PDF
|
// Generate preview from the first page of the PDF
|
||||||
PDDocument pdfDocument = null;
|
image = PdfUtil.renderFirstPage(pdfInputStream);
|
||||||
try {
|
pdfInputStream.reset();
|
||||||
pdfDocument = PDDocument.load(pdfInputStream);
|
|
||||||
PDFRenderer renderer = new PDFRenderer(pdfDocument);
|
|
||||||
image = renderer.renderImage(0);
|
|
||||||
pdfInputStream.reset();
|
|
||||||
} finally {
|
|
||||||
pdfDocument.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (image != null) {
|
if (image != null) {
|
||||||
@ -289,94 +169,4 @@ public class FileUtil {
|
|||||||
Files.delete(thumbnailFile);
|
Files.delete(thumbnailFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a document and its files to a merged PDF file.
|
|
||||||
*
|
|
||||||
* @param fileList List of files
|
|
||||||
* @param fitImageToPage Fill images to the page
|
|
||||||
* @param margin Margins in millimeters
|
|
||||||
* @return PDF input stream
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public static InputStream convertToPdf(List<File> fileList, boolean fitImageToPage, int margin) throws Exception {
|
|
||||||
// TODO PDF Export: Option to add a front page with:
|
|
||||||
// document title, document description, creator, date created, language,
|
|
||||||
// list of all files (and information if it is in this document or not)
|
|
||||||
// TODO PDF Export: Option to add the comments
|
|
||||||
|
|
||||||
// Create a blank PDF
|
|
||||||
Closer closer = Closer.create();
|
|
||||||
MemoryUsageSetting memUsageSettings = MemoryUsageSetting.setupMixed(1000000); // 1MB max memory usage
|
|
||||||
memUsageSettings.setTempDir(new java.io.File(System.getProperty("java.io.tmpdir"))); // To OS temp
|
|
||||||
float mmPerInch = 1 / (10 * 2.54f) * 72f;
|
|
||||||
|
|
||||||
try (PDDocument doc = new PDDocument(memUsageSettings)) {
|
|
||||||
// Add files
|
|
||||||
for (File file : fileList) {
|
|
||||||
Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file.getId());
|
|
||||||
try (InputStream storedFileInputStream = file.getPrivateKey() == null ? // Try to decrypt the file if we have a private key available
|
|
||||||
Files.newInputStream(storedFile) : EncryptionUtil.decryptInputStream(Files.newInputStream(storedFile), file.getPrivateKey())) {
|
|
||||||
if (ImageUtil.isImage(file.getMimeType())) {
|
|
||||||
PDPage page = new PDPage(PDRectangle.A4); // Images into A4 pages
|
|
||||||
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
|
|
||||||
// Read the image using the correct handler. PDFBox can't do it because it relies wrongly on file extension
|
|
||||||
PDImageXObject pdImage = null;
|
|
||||||
if (file.getMimeType().equals(MimeType.IMAGE_JPEG)) {
|
|
||||||
pdImage = JPEGFactory.createFromStream(doc, storedFileInputStream);
|
|
||||||
} else if (file.getMimeType().equals(MimeType.IMAGE_GIF) || file.getMimeType().equals(MimeType.IMAGE_PNG)) {
|
|
||||||
BufferedImage bim = ImageIO.read(storedFileInputStream);
|
|
||||||
pdImage = LosslessFactory.createFromImage(doc, bim);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fitImageToPage) {
|
|
||||||
// Fill the page with the image
|
|
||||||
float widthAvailable = page.getMediaBox().getWidth() - 2 * margin * mmPerInch;
|
|
||||||
float heightAvailable = page.getMediaBox().getHeight() - 2 * margin * mmPerInch;
|
|
||||||
|
|
||||||
// Compare page format and image format
|
|
||||||
if (widthAvailable / heightAvailable < (float) pdImage.getWidth() / (float) pdImage.getHeight()) {
|
|
||||||
float imageHeight = widthAvailable / pdImage.getWidth() * pdImage.getHeight();
|
|
||||||
contentStream.drawImage(pdImage, margin * mmPerInch, heightAvailable + margin * mmPerInch - imageHeight,
|
|
||||||
widthAvailable, imageHeight);
|
|
||||||
} else {
|
|
||||||
float imageWidth = heightAvailable / pdImage.getHeight() * pdImage.getWidth();
|
|
||||||
contentStream.drawImage(pdImage, margin * mmPerInch, margin * mmPerInch,
|
|
||||||
imageWidth, heightAvailable);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Draw the image as is
|
|
||||||
contentStream.drawImage(pdImage, margin * mmPerInch,
|
|
||||||
page.getMediaBox().getHeight() - pdImage.getHeight() - margin * mmPerInch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
doc.addPage(page);
|
|
||||||
} else {
|
|
||||||
// Try to convert the file to PDF
|
|
||||||
InputStream pdfInputStream = convertToPdf(file, storedFileInputStream, false);
|
|
||||||
if (pdfInputStream != null) {
|
|
||||||
// This file is convertible to PDF, just add it to the end
|
|
||||||
try {
|
|
||||||
PDDocument mergeDoc = PDDocument.load(pdfInputStream, memUsageSettings);
|
|
||||||
closer.register(mergeDoc);
|
|
||||||
PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();
|
|
||||||
pdfMergerUtility.appendDocument(doc, mergeDoc);
|
|
||||||
} finally {
|
|
||||||
pdfInputStream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// All other non-PDF-convertible files are ignored
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save to a temporary file
|
|
||||||
try (TemporaryFileStream temporaryFileStream = new TemporaryFileStream()) {
|
|
||||||
doc.save(temporaryFileStream.openWriteStream());
|
|
||||||
closer.close(); // Close all remaining opened PDF
|
|
||||||
return temporaryFileStream.openReadStream();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
245
docs-core/src/main/java/com/sismics/docs/core/util/PdfUtil.java
Normal file
245
docs-core/src/main/java/com/sismics/docs/core/util/PdfUtil.java
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
package com.sismics.docs.core.util;
|
||||||
|
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.io.MemoryUsageSetting;
|
||||||
|
import org.apache.pdfbox.multipdf.PDFMergerUtility;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||||
|
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||||
|
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||||
|
import org.apache.pdfbox.text.PDFTextStripper;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
|
import org.odftoolkit.odfdom.converter.pdf.PdfConverter;
|
||||||
|
import org.odftoolkit.odfdom.converter.pdf.PdfOptions;
|
||||||
|
import org.odftoolkit.odfdom.doc.OdfTextDocument;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.common.io.Closer;
|
||||||
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
|
import com.sismics.util.ImageUtil;
|
||||||
|
import com.sismics.util.mime.MimeType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PDF utilities.
|
||||||
|
*
|
||||||
|
* @author bgamard
|
||||||
|
*/
|
||||||
|
public class PdfUtil {
|
||||||
|
/**
|
||||||
|
* Logger.
|
||||||
|
*/
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PdfUtil.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract text from a PDF.
|
||||||
|
*
|
||||||
|
* @param inputStream Unencrypted input stream
|
||||||
|
* @return Content extracted
|
||||||
|
*/
|
||||||
|
public static String extractPdf(InputStream inputStream) {
|
||||||
|
String content = null;
|
||||||
|
PDDocument pdfDocument = null;
|
||||||
|
try {
|
||||||
|
PDFTextStripper stripper = new PDFTextStripper();
|
||||||
|
pdfDocument = PDDocument.load(inputStream);
|
||||||
|
content = stripper.getText(pdfDocument);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("Error while extracting text from the PDF", e);
|
||||||
|
} finally {
|
||||||
|
if (pdfDocument != null) {
|
||||||
|
try {
|
||||||
|
pdfDocument.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// NOP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a file to PDF if necessary.
|
||||||
|
*
|
||||||
|
* @param file File
|
||||||
|
* @param inputStream InputStream
|
||||||
|
* @param reset Reset the stream after usage
|
||||||
|
* @return PDF input stream
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public static InputStream convertToPdf(File file, InputStream inputStream, boolean reset) throws Exception {
|
||||||
|
if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
||||||
|
// It's already PDF, just return the input
|
||||||
|
return inputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) {
|
||||||
|
return convertOfficeDocument(inputStream, reset);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) {
|
||||||
|
return convertOpenDocumentText(inputStream, reset);
|
||||||
|
}
|
||||||
|
|
||||||
|
// PDF conversion not necessary/possible
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an open document text file to PDF.
|
||||||
|
*
|
||||||
|
* @param inputStream Unencrypted input stream
|
||||||
|
* @param reset Reset the stream after usage
|
||||||
|
* @return PDF input stream
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
private static InputStream convertOpenDocumentText(InputStream inputStream, boolean reset) throws Exception {
|
||||||
|
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||||
|
OdfTextDocument document = OdfTextDocument.loadDocument(inputStream);
|
||||||
|
PdfOptions options = PdfOptions.create();
|
||||||
|
PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
||||||
|
if (reset) {
|
||||||
|
inputStream.reset();
|
||||||
|
}
|
||||||
|
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an Office document to PDF.
|
||||||
|
*
|
||||||
|
* @param inputStream Unencrypted input stream
|
||||||
|
* @param reset Reset the stream after usage
|
||||||
|
* @return PDF input stream
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
private static InputStream convertOfficeDocument(InputStream inputStream, boolean reset) throws Exception {
|
||||||
|
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||||
|
XWPFDocument document = new XWPFDocument(inputStream);
|
||||||
|
org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create();
|
||||||
|
org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
||||||
|
if (reset) {
|
||||||
|
inputStream.reset();
|
||||||
|
}
|
||||||
|
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a document and its files to a merged PDF file.
|
||||||
|
*
|
||||||
|
* @param fileList List of files
|
||||||
|
* @param fitImageToPage Fit images to the page
|
||||||
|
* @param margin Margins in millimeters
|
||||||
|
* @return PDF input stream
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static InputStream convertToPdf(List<File> fileList, boolean fitImageToPage, int margin) throws Exception {
|
||||||
|
// TODO PDF Export: Option to add a front page with:
|
||||||
|
// document title, document description, creator, date created, language,
|
||||||
|
// list of all files (and information if it is in this document or not)
|
||||||
|
// TODO PDF Export: Option to add the comments
|
||||||
|
|
||||||
|
// Create a blank PDF
|
||||||
|
Closer closer = Closer.create();
|
||||||
|
MemoryUsageSetting memUsageSettings = MemoryUsageSetting.setupMixed(1000000); // 1MB max memory usage
|
||||||
|
memUsageSettings.setTempDir(new java.io.File(System.getProperty("java.io.tmpdir"))); // To OS temp
|
||||||
|
float mmPerInch = 1 / (10 * 2.54f) * 72f;
|
||||||
|
|
||||||
|
try (PDDocument doc = new PDDocument(memUsageSettings)) {
|
||||||
|
// Add files
|
||||||
|
for (File file : fileList) {
|
||||||
|
Path storedFile = DirectoryUtil.getStorageDirectory().resolve(file.getId());
|
||||||
|
try (InputStream storedFileInputStream = file.getPrivateKey() == null ? // Try to decrypt the file if we have a private key available
|
||||||
|
Files.newInputStream(storedFile) : EncryptionUtil.decryptInputStream(Files.newInputStream(storedFile), file.getPrivateKey())) {
|
||||||
|
if (ImageUtil.isImage(file.getMimeType())) {
|
||||||
|
PDPage page = new PDPage(PDRectangle.A4); // Images into A4 pages
|
||||||
|
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
|
||||||
|
// Read the image using the correct handler. PDFBox can't do it because it relies wrongly on file extension
|
||||||
|
PDImageXObject pdImage = null;
|
||||||
|
if (file.getMimeType().equals(MimeType.IMAGE_JPEG)) {
|
||||||
|
pdImage = JPEGFactory.createFromStream(doc, storedFileInputStream);
|
||||||
|
} else if (file.getMimeType().equals(MimeType.IMAGE_GIF) || file.getMimeType().equals(MimeType.IMAGE_PNG)) {
|
||||||
|
BufferedImage bim = ImageIO.read(storedFileInputStream);
|
||||||
|
pdImage = LosslessFactory.createFromImage(doc, bim);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do we want to fill the page with the image?
|
||||||
|
if (fitImageToPage) {
|
||||||
|
// Fill the page with the image
|
||||||
|
float widthAvailable = page.getMediaBox().getWidth() - 2 * margin * mmPerInch;
|
||||||
|
float heightAvailable = page.getMediaBox().getHeight() - 2 * margin * mmPerInch;
|
||||||
|
|
||||||
|
// Compare page format and image format
|
||||||
|
if (widthAvailable / heightAvailable < (float) pdImage.getWidth() / (float) pdImage.getHeight()) {
|
||||||
|
float imageHeight = widthAvailable / pdImage.getWidth() * pdImage.getHeight();
|
||||||
|
contentStream.drawImage(pdImage, margin * mmPerInch, heightAvailable + margin * mmPerInch - imageHeight,
|
||||||
|
widthAvailable, imageHeight);
|
||||||
|
} else {
|
||||||
|
float imageWidth = heightAvailable / pdImage.getHeight() * pdImage.getWidth();
|
||||||
|
contentStream.drawImage(pdImage, margin * mmPerInch, margin * mmPerInch,
|
||||||
|
imageWidth, heightAvailable);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Draw the image as is
|
||||||
|
contentStream.drawImage(pdImage, margin * mmPerInch,
|
||||||
|
page.getMediaBox().getHeight() - pdImage.getHeight() - margin * mmPerInch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
doc.addPage(page);
|
||||||
|
} else {
|
||||||
|
// Try to convert the file to PDF
|
||||||
|
InputStream pdfInputStream = convertToPdf(file, storedFileInputStream, false);
|
||||||
|
if (pdfInputStream != null) {
|
||||||
|
// This file is convertible to PDF, just add it to the end
|
||||||
|
try {
|
||||||
|
PDDocument mergeDoc = PDDocument.load(pdfInputStream, memUsageSettings);
|
||||||
|
closer.register(mergeDoc);
|
||||||
|
PDFMergerUtility pdfMergerUtility = new PDFMergerUtility();
|
||||||
|
pdfMergerUtility.appendDocument(doc, mergeDoc);
|
||||||
|
} finally {
|
||||||
|
pdfInputStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// All other non-PDF-convertible files are ignored
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save to a temporary file
|
||||||
|
try (TemporaryFileStream temporaryFileStream = new TemporaryFileStream()) {
|
||||||
|
doc.save(temporaryFileStream.openWriteStream());
|
||||||
|
closer.close(); // Close all remaining opened PDF
|
||||||
|
return temporaryFileStream.openReadStream();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Render the first page of a PDF.
|
||||||
|
*
|
||||||
|
* @param inputStream PDF document
|
||||||
|
* @return Render of the first page
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static BufferedImage renderFirstPage(InputStream inputStream) throws IOException {
|
||||||
|
try (PDDocument pdfDocument = PDDocument.load(inputStream)) {
|
||||||
|
PDFRenderer renderer = new PDFRenderer(pdfDocument);
|
||||||
|
return renderer.renderImage(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -4,8 +4,6 @@ import java.io.InputStream;
|
|||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.StandardCopyOption;
|
import java.nio.file.StandardCopyOption;
|
||||||
|
|
||||||
import junit.framework.Assert;
|
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
@ -13,6 +11,8 @@ import com.google.common.io.Resources;
|
|||||||
import com.sismics.docs.core.model.jpa.File;
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
import com.sismics.util.mime.MimeType;
|
import com.sismics.util.mime.MimeType;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test of the file entity utilities.
|
* Test of the file entity utilities.
|
||||||
*
|
*
|
||||||
@ -24,7 +24,7 @@ public class TestFileUtil {
|
|||||||
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream()) {
|
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream()) {
|
||||||
File file = new File();
|
File file = new File();
|
||||||
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||||
try (InputStream pdfInputStream = FileUtil.convertToPdf(file, inputStream, false)) {
|
try (InputStream pdfInputStream = PdfUtil.convertToPdf(file, inputStream, false)) {
|
||||||
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream);
|
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream);
|
||||||
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||||
}
|
}
|
||||||
@ -36,7 +36,7 @@ public class TestFileUtil {
|
|||||||
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream()) {
|
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream()) {
|
||||||
File file = new File();
|
File file = new File();
|
||||||
file.setMimeType(MimeType.OFFICE_DOCUMENT);
|
file.setMimeType(MimeType.OFFICE_DOCUMENT);
|
||||||
try (InputStream pdfInputStream = FileUtil.convertToPdf(file, inputStream, false)) {
|
try (InputStream pdfInputStream = PdfUtil.convertToPdf(file, inputStream, false)) {
|
||||||
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream);
|
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream);
|
||||||
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||||
}
|
}
|
||||||
@ -81,7 +81,7 @@ public class TestFileUtil {
|
|||||||
file4.setId("document_odt");
|
file4.setId("document_odt");
|
||||||
file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
file4.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||||
|
|
||||||
FileUtil.convertToPdf(Lists.newArrayList(file0, file1, file2, file3, file4), true, 10).close();
|
PdfUtil.convertToPdf(Lists.newArrayList(file0, file1, file2, file3, file4), true, 10).close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,6 +53,7 @@ import com.sismics.docs.core.model.jpa.User;
|
|||||||
import com.sismics.docs.core.util.DirectoryUtil;
|
import com.sismics.docs.core.util.DirectoryUtil;
|
||||||
import com.sismics.docs.core.util.EncryptionUtil;
|
import com.sismics.docs.core.util.EncryptionUtil;
|
||||||
import com.sismics.docs.core.util.FileUtil;
|
import com.sismics.docs.core.util.FileUtil;
|
||||||
|
import com.sismics.docs.core.util.PdfUtil;
|
||||||
import com.sismics.rest.exception.ClientException;
|
import com.sismics.rest.exception.ClientException;
|
||||||
import com.sismics.rest.exception.ForbiddenClientException;
|
import com.sismics.rest.exception.ForbiddenClientException;
|
||||||
import com.sismics.rest.exception.ServerException;
|
import com.sismics.rest.exception.ServerException;
|
||||||
@ -150,7 +151,7 @@ public class FileResource extends BaseResource {
|
|||||||
file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, fileInputStream));
|
file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, fileInputStream));
|
||||||
|
|
||||||
// Convert to PDF if necessary (for thumbnail and text extraction)
|
// Convert to PDF if necessary (for thumbnail and text extraction)
|
||||||
InputStream pdfIntputStream = FileUtil.convertToPdf(file, fileInputStream, true);
|
InputStream pdfIntputStream = PdfUtil.convertToPdf(file, fileInputStream, true);
|
||||||
|
|
||||||
// Save the file
|
// Save the file
|
||||||
FileUtil.save(fileInputStream, pdfIntputStream, file, user.getPrivateKey());
|
FileUtil.save(fileInputStream, pdfIntputStream, file, user.getPrivateKey());
|
||||||
|
Loading…
Reference in New Issue
Block a user