mirror of
https://github.com/sismics/docs.git
synced 2024-11-22 22:07:56 +01:00
commit
737b3299ff
@ -21,7 +21,7 @@ Features
|
|||||||
|
|
||||||
- Responsive user interface
|
- Responsive user interface
|
||||||
- Optical character recognition
|
- Optical character recognition
|
||||||
- Support image and PDF files
|
- Support image, PDF, ODT and DOCX files
|
||||||
- Flexible search engine
|
- Flexible search engine
|
||||||
- Full text search in image and PDF
|
- Full text search in image and PDF
|
||||||
- 256-bit AES encryption
|
- 256-bit AES encryption
|
||||||
|
@ -117,6 +117,16 @@
|
|||||||
<groupId>com.levigo.jbig2</groupId>
|
<groupId>com.levigo.jbig2</groupId>
|
||||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>fr.opensagres.xdocreport</groupId>
|
||||||
|
<artifactId>org.odftoolkit.odfdom.converter.pdf</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>fr.opensagres.xdocreport</groupId>
|
||||||
|
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- OCR dependencies -->
|
<!-- OCR dependencies -->
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -142,6 +142,7 @@ public class FileDao {
|
|||||||
fileFromDb.setDocumentId(file.getDocumentId());
|
fileFromDb.setDocumentId(file.getDocumentId());
|
||||||
fileFromDb.setContent(file.getContent());
|
fileFromDb.setContent(file.getContent());
|
||||||
fileFromDb.setOrder(file.getOrder());
|
fileFromDb.setOrder(file.getOrder());
|
||||||
|
fileFromDb.setMimeType(file.getMimeType());
|
||||||
|
|
||||||
return file;
|
return file;
|
||||||
}
|
}
|
||||||
|
@ -28,58 +28,43 @@ public class FileCreatedAsyncEvent {
|
|||||||
private InputStream inputStream;
|
private InputStream inputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Getter of file.
|
* Unencrypted input stream containing a PDF representation
|
||||||
*
|
* of the file. May be null if the PDF conversion is not
|
||||||
* @return the file
|
* necessary or not possible.
|
||||||
*/
|
*/
|
||||||
|
private InputStream pdfInputStream;
|
||||||
|
|
||||||
public File getFile() {
|
public File getFile() {
|
||||||
return file;
|
return file;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Setter of file.
|
|
||||||
*
|
|
||||||
* @param file file
|
|
||||||
*/
|
|
||||||
public void setFile(File file) {
|
public void setFile(File file) {
|
||||||
this.file = file;
|
this.file = file;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Getter of document.
|
|
||||||
*
|
|
||||||
* @return the document
|
|
||||||
*/
|
|
||||||
public Document getDocument() {
|
public Document getDocument() {
|
||||||
return document;
|
return document;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Setter of document.
|
|
||||||
*
|
|
||||||
* @param document document
|
|
||||||
*/
|
|
||||||
public void setDocument(Document document) {
|
public void setDocument(Document document) {
|
||||||
this.document = document;
|
this.document = document;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Getter of inputStream.
|
|
||||||
*
|
|
||||||
* @return the inputStream
|
|
||||||
*/
|
|
||||||
public InputStream getInputStream() {
|
public InputStream getInputStream() {
|
||||||
return inputStream;
|
return inputStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Setter de inputStream.
|
|
||||||
*
|
|
||||||
* @param inputStream inputStream
|
|
||||||
*/
|
|
||||||
public void setInputStream(InputStream inputStream) {
|
public void setInputStream(InputStream inputStream) {
|
||||||
this.inputStream = inputStream;
|
this.inputStream = inputStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public InputStream getPdfInputStream() {
|
||||||
|
return pdfInputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setPdfInputStream(InputStream pdfInputStream) {
|
||||||
|
this.pdfInputStream = pdfInputStream;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
@ -36,20 +36,26 @@ public class FileCreatedAsyncListener {
|
|||||||
log.info("File created event: " + fileCreatedAsyncEvent.toString());
|
log.info("File created event: " + fileCreatedAsyncEvent.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
// OCR the file
|
// Guess the mime type a second time, for open document format (first detected as simple ZIP file)
|
||||||
final File file = fileCreatedAsyncEvent.getFile();
|
final File file = fileCreatedAsyncEvent.getFile();
|
||||||
|
|
||||||
|
// Extract text content from the file
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getDocument(), file, fileCreatedAsyncEvent.getInputStream());
|
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getDocument(), file,
|
||||||
|
fileCreatedAsyncEvent.getInputStream(), fileCreatedAsyncEvent.getPdfInputStream());
|
||||||
fileCreatedAsyncEvent.getInputStream().close();
|
fileCreatedAsyncEvent.getInputStream().close();
|
||||||
|
if (fileCreatedAsyncEvent.getPdfInputStream() != null) {
|
||||||
|
fileCreatedAsyncEvent.getPdfInputStream().close();
|
||||||
|
}
|
||||||
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
|
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
|
||||||
|
|
||||||
// Store the OCR-ization result in the database
|
// Store the text content in the database
|
||||||
TransactionUtil.handle(new Runnable() {
|
TransactionUtil.handle(new Runnable() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
FileDao fileDao = new FileDao();
|
FileDao fileDao = new FileDao();
|
||||||
if (fileDao.getById(file.getId()) == null) {
|
if (fileDao.getById(file.getId()) == null) {
|
||||||
// The file has been deleted since the OCR-ization started, ignore the result
|
// The file has been deleted since the text extraction started, ignore the result
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
package com.sismics.docs.core.util;
|
package com.sismics.docs.core.util;
|
||||||
|
|
||||||
import java.awt.image.BufferedImage;
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
@ -15,9 +17,13 @@ import javax.imageio.ImageIO;
|
|||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||||
import org.apache.pdfbox.text.PDFTextStripper;
|
import org.apache.pdfbox.text.PDFTextStripper;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
import org.imgscalr.Scalr;
|
import org.imgscalr.Scalr;
|
||||||
import org.imgscalr.Scalr.Method;
|
import org.imgscalr.Scalr.Method;
|
||||||
import org.imgscalr.Scalr.Mode;
|
import org.imgscalr.Scalr.Mode;
|
||||||
|
import org.odftoolkit.odfdom.converter.pdf.PdfConverter;
|
||||||
|
import org.odftoolkit.odfdom.converter.pdf.PdfOptions;
|
||||||
|
import org.odftoolkit.odfdom.doc.OdfTextDocument;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -44,15 +50,16 @@ public class FileUtil {
|
|||||||
* @param document Document linked to the file
|
* @param document Document linked to the file
|
||||||
* @param file File to extract
|
* @param file File to extract
|
||||||
* @param inputStream Unencrypted input stream
|
* @param inputStream Unencrypted input stream
|
||||||
|
* @param pdfInputStream Unencrypted PDF input stream
|
||||||
* @return Content extract
|
* @return Content extract
|
||||||
*/
|
*/
|
||||||
public static String extractContent(Document document, File file, InputStream inputStream) {
|
public static String extractContent(Document document, File file, InputStream inputStream, InputStream pdfInputStream) {
|
||||||
String content = null;
|
String content = null;
|
||||||
|
|
||||||
if (ImageUtil.isImage(file.getMimeType())) {
|
if (ImageUtil.isImage(file.getMimeType())) {
|
||||||
content = ocrFile(inputStream, document);
|
content = ocrFile(inputStream, document);
|
||||||
} else if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
} else if (pdfInputStream != null) {
|
||||||
content = extractPdf(inputStream);
|
content = extractPdf(pdfInputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
return content;
|
return content;
|
||||||
@ -120,23 +127,81 @@ public class FileUtil {
|
|||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a file to PDF if necessary.
|
||||||
|
*
|
||||||
|
* @param inputStream InputStream
|
||||||
|
* @param file File
|
||||||
|
* @return PDF input stream
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public static InputStream convertToPdf(InputStream inputStream, File file) throws Exception {
|
||||||
|
if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
||||||
|
// It's already PDF, just return the input
|
||||||
|
return inputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) {
|
||||||
|
return convertOfficeDocument(inputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) {
|
||||||
|
return convertOpenDocumentText(inputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// PDF conversion not necessary/possible
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an open document text file to PDF.
|
||||||
|
*
|
||||||
|
* @param inputStream Unencrypted input stream
|
||||||
|
* @return PDF input stream
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
private static InputStream convertOpenDocumentText(InputStream inputStream) throws Exception {
|
||||||
|
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||||
|
OdfTextDocument document = OdfTextDocument.loadDocument(inputStream);
|
||||||
|
PdfOptions options = PdfOptions.create();
|
||||||
|
PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
||||||
|
inputStream.reset();
|
||||||
|
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an Office document to PDF.
|
||||||
|
*
|
||||||
|
* @param inputStream Unencrypted input stream
|
||||||
|
* @return PDF input stream
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
private static InputStream convertOfficeDocument(InputStream inputStream) throws Exception {
|
||||||
|
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||||
|
XWPFDocument document = new XWPFDocument(inputStream);
|
||||||
|
org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create();
|
||||||
|
org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
||||||
|
inputStream.reset();
|
||||||
|
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Save a file on the storage filesystem.
|
* Save a file on the storage filesystem.
|
||||||
*
|
*
|
||||||
* @param inputStream Unencrypted input stream
|
* @param inputStream Unencrypted input stream
|
||||||
|
* @param pdf
|
||||||
* @param file File to save
|
* @param file File to save
|
||||||
* @param privateKey Private key used for encryption
|
* @param privateKey Private key used for encryption
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
public static void save(InputStream inputStream, File file, String privateKey) throws Exception {
|
public static void save(InputStream inputStream, InputStream pdfInputStream, File file, String privateKey) throws Exception {
|
||||||
Cipher cipher = EncryptionUtil.getEncryptionCipher(privateKey);
|
Cipher cipher = EncryptionUtil.getEncryptionCipher(privateKey);
|
||||||
Path path = DirectoryUtil.getStorageDirectory().resolve(file.getId());
|
Path path = DirectoryUtil.getStorageDirectory().resolve(file.getId());
|
||||||
Files.copy(new CipherInputStream(inputStream, cipher), path);
|
Files.copy(new CipherInputStream(inputStream, cipher), path);
|
||||||
|
inputStream.reset();
|
||||||
|
|
||||||
// Generate file variations
|
// Generate file variations
|
||||||
inputStream.reset();
|
saveVariations(file, inputStream, pdfInputStream, cipher);
|
||||||
saveVariations(file, inputStream, cipher);
|
|
||||||
inputStream.reset();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -144,20 +209,23 @@ public class FileUtil {
|
|||||||
*
|
*
|
||||||
* @param file File from database
|
* @param file File from database
|
||||||
* @param inputStream Unencrypted input stream
|
* @param inputStream Unencrypted input stream
|
||||||
|
* @param pdfInputStream Unencrypted PDF input stream
|
||||||
* @param cipher Cipher to use for encryption
|
* @param cipher Cipher to use for encryption
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
public static void saveVariations(File file, InputStream inputStream, Cipher cipher) throws Exception {
|
public static void saveVariations(File file, InputStream inputStream, InputStream pdfInputStream, Cipher cipher) throws Exception {
|
||||||
BufferedImage image = null;
|
BufferedImage image = null;
|
||||||
if (ImageUtil.isImage(file.getMimeType())) {
|
if (ImageUtil.isImage(file.getMimeType())) {
|
||||||
image = ImageIO.read(inputStream);
|
image = ImageIO.read(inputStream);
|
||||||
} else if(file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
inputStream.reset();
|
||||||
|
} else if(pdfInputStream != null) {
|
||||||
// Generate preview from the first page of the PDF
|
// Generate preview from the first page of the PDF
|
||||||
PDDocument pdfDocument = null;
|
PDDocument pdfDocument = null;
|
||||||
try {
|
try {
|
||||||
pdfDocument = PDDocument.load(inputStream);
|
pdfDocument = PDDocument.load(pdfInputStream);
|
||||||
PDFRenderer renderer = new PDFRenderer(pdfDocument);
|
PDFRenderer renderer = new PDFRenderer(pdfDocument);
|
||||||
image = renderer.renderImage(0);
|
image = renderer.renderImage(0);
|
||||||
|
pdfInputStream.reset();
|
||||||
} finally {
|
} finally {
|
||||||
pdfDocument.close();
|
pdfDocument.close();
|
||||||
}
|
}
|
||||||
|
@ -18,4 +18,8 @@ public class MimeType {
|
|||||||
public static final String APPLICATION_ZIP = "application/zip";
|
public static final String APPLICATION_ZIP = "application/zip";
|
||||||
|
|
||||||
public static final String APPLICATION_PDF = "application/pdf";
|
public static final String APPLICATION_PDF = "application/pdf";
|
||||||
|
|
||||||
|
public static final String OPEN_DOCUMENT_TEXT = "application/vnd.oasis.opendocument.text";
|
||||||
|
|
||||||
|
public static final String OFFICE_DOCUMENT = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,13 @@ package com.sismics.util.mime;
|
|||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
|
|
||||||
|
import org.apache.commons.compress.archivers.ArchiveEntry;
|
||||||
|
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
|
||||||
|
import org.apache.commons.compress.utils.IOUtils;
|
||||||
|
|
||||||
|
import com.google.common.base.Charsets;
|
||||||
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility to check MIME types.
|
* Utility to check MIME types.
|
||||||
*
|
*
|
||||||
@ -77,8 +84,59 @@ public class MimeTypeUtil {
|
|||||||
return "ico";
|
return "ico";
|
||||||
case MimeType.APPLICATION_PDF:
|
case MimeType.APPLICATION_PDF:
|
||||||
return "pdf";
|
return "pdf";
|
||||||
|
case MimeType.OPEN_DOCUMENT_TEXT:
|
||||||
|
return "odt";
|
||||||
|
case MimeType.OFFICE_DOCUMENT:
|
||||||
|
return "docx";
|
||||||
default:
|
default:
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Guess the MIME type of open document formats (docx and odt).
|
||||||
|
* It's more costly than the simple header check, but needed because open document formats
|
||||||
|
* are simple ZIP files on the outside and much bigger on the inside.
|
||||||
|
*
|
||||||
|
* @param file File
|
||||||
|
* @param inputStream Input stream
|
||||||
|
* @return MIME type
|
||||||
|
*/
|
||||||
|
public static String guessOpenDocumentFormat(File file, InputStream inputStream) {
|
||||||
|
if (!MimeType.APPLICATION_ZIP.equals(file.getMimeType())) {
|
||||||
|
// open document formats are ZIP files
|
||||||
|
return file.getMimeType();
|
||||||
|
}
|
||||||
|
|
||||||
|
String mimeType = file.getMimeType();
|
||||||
|
try (ZipArchiveInputStream archiveInputStream = new ZipArchiveInputStream(inputStream, Charsets.ISO_8859_1.name())) {
|
||||||
|
ArchiveEntry archiveEntry = archiveInputStream.getNextEntry();
|
||||||
|
while (archiveEntry != null) {
|
||||||
|
if (archiveEntry.getName().equals("mimetype")) {
|
||||||
|
// Maybe it's an ODT file
|
||||||
|
String content = new String(IOUtils.toByteArray(archiveInputStream), Charsets.ISO_8859_1);
|
||||||
|
if (MimeType.OPEN_DOCUMENT_TEXT.equals(content.trim())) {
|
||||||
|
mimeType = MimeType.OPEN_DOCUMENT_TEXT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (archiveEntry.getName().equals("[Content_Types].xml")) {
|
||||||
|
// Maybe it's a DOCX file
|
||||||
|
String content = new String(IOUtils.toByteArray(archiveInputStream), Charsets.ISO_8859_1);
|
||||||
|
if (content.contains(MimeType.OFFICE_DOCUMENT)) {
|
||||||
|
mimeType = MimeType.OFFICE_DOCUMENT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
archiveEntry = archiveInputStream.getNextEntry();
|
||||||
|
}
|
||||||
|
|
||||||
|
inputStream.reset();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// In case of any error, just give up and keep the ZIP MIME type
|
||||||
|
return file.getMimeType();
|
||||||
|
}
|
||||||
|
|
||||||
|
return mimeType;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,6 @@ import com.google.common.io.ByteStreams;
|
|||||||
* @author bgamard
|
* @author bgamard
|
||||||
*/
|
*/
|
||||||
public class TestEncryptUtil {
|
public class TestEncryptUtil {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test private key.
|
* Test private key.
|
||||||
*/
|
*/
|
||||||
|
@ -0,0 +1,46 @@
|
|||||||
|
package com.sismics.docs.core.util;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.io.IOUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.io.Resources;
|
||||||
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
|
import com.sismics.util.mime.MimeType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of the file entity utilities.
|
||||||
|
*
|
||||||
|
* @author bgamard
|
||||||
|
*/
|
||||||
|
public class TestFileUtil {
|
||||||
|
@Test
|
||||||
|
public void extractContentOpenDocumentTextTest() throws Exception {
|
||||||
|
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream();
|
||||||
|
InputStream bytesInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||||
|
File file = new File();
|
||||||
|
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||||
|
try (InputStream pdfInputStream = FileUtil.convertToPdf(bytesInputStream, file)) {
|
||||||
|
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream);
|
||||||
|
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void extractContentOfficeDocumentTest() throws Exception {
|
||||||
|
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream();
|
||||||
|
InputStream bytesInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||||
|
File file = new File();
|
||||||
|
file.setMimeType(MimeType.OFFICE_DOCUMENT);
|
||||||
|
try (InputStream pdfInputStream = FileUtil.convertToPdf(bytesInputStream, file)) {
|
||||||
|
String content = FileUtil.extractContent(null, file, inputStream, pdfInputStream);
|
||||||
|
Assert.assertTrue(content.contains("Lorem ipsum dolor sit amen."));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,40 @@
|
|||||||
|
package com.sismics.util;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import org.apache.commons.compress.utils.IOUtils;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.io.Resources;
|
||||||
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
|
import com.sismics.util.mime.MimeType;
|
||||||
|
import com.sismics.util.mime.MimeTypeUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of the utilities to check MIME types.
|
||||||
|
*
|
||||||
|
* @author bgamard
|
||||||
|
*/
|
||||||
|
public class TestMimeTypeUtil {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void guessOpenDocumentFormatTest() throws Exception {
|
||||||
|
// Detect ODT files
|
||||||
|
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream();
|
||||||
|
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||||
|
File file = new File();
|
||||||
|
file.setMimeType(MimeType.APPLICATION_ZIP);
|
||||||
|
Assert.assertEquals(MimeType.OPEN_DOCUMENT_TEXT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect DOCX files
|
||||||
|
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream();
|
||||||
|
InputStream byteArrayInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||||
|
File file = new File();
|
||||||
|
file.setMimeType(MimeType.APPLICATION_ZIP);
|
||||||
|
Assert.assertEquals(MimeType.OFFICE_DOCUMENT, MimeTypeUtil.guessOpenDocumentFormat(file, byteArrayInputStream));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
BIN
docs-core/src/test/resources/file/document.docx
Normal file
BIN
docs-core/src/test/resources/file/document.docx
Normal file
Binary file not shown.
BIN
docs-core/src/test/resources/file/document.odt
Normal file
BIN
docs-core/src/test/resources/file/document.odt
Normal file
Binary file not shown.
@ -36,6 +36,7 @@
|
|||||||
<org.hibernate.hibernate.version>4.1.0.Final</org.hibernate.hibernate.version>
|
<org.hibernate.hibernate.version>4.1.0.Final</org.hibernate.hibernate.version>
|
||||||
<javax.servlet.javax.servlet-api.version>3.1.0</javax.servlet.javax.servlet-api.version>
|
<javax.servlet.javax.servlet-api.version>3.1.0</javax.servlet.javax.servlet-api.version>
|
||||||
<com.levigo.jbig2.levigo-jbig2-imageio.version>1.6.3</com.levigo.jbig2.levigo-jbig2-imageio.version>
|
<com.levigo.jbig2.levigo-jbig2-imageio.version>1.6.3</com.levigo.jbig2.levigo-jbig2-imageio.version>
|
||||||
|
<fr.opensagres.xdocreport.version>1.0.5</fr.opensagres.xdocreport.version>
|
||||||
|
|
||||||
<org.eclipse.jetty.jetty-server.version>9.2.13.v20150730</org.eclipse.jetty.jetty-server.version>
|
<org.eclipse.jetty.jetty-server.version>9.2.13.v20150730</org.eclipse.jetty.jetty-server.version>
|
||||||
<org.eclipse.jetty.jetty-webapp.version>9.2.13.v20150730</org.eclipse.jetty.jetty-webapp.version>
|
<org.eclipse.jetty.jetty-webapp.version>9.2.13.v20150730</org.eclipse.jetty.jetty-webapp.version>
|
||||||
@ -367,6 +368,18 @@
|
|||||||
<version>${org.bouncycastle.bcprov-jdk15on.version}</version>
|
<version>${org.bouncycastle.bcprov-jdk15on.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>fr.opensagres.xdocreport</groupId>
|
||||||
|
<artifactId>org.odftoolkit.odfdom.converter.pdf</artifactId>
|
||||||
|
<version>${fr.opensagres.xdocreport.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>fr.opensagres.xdocreport</groupId>
|
||||||
|
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
|
||||||
|
<version>${fr.opensagres.xdocreport.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Used to read JBIG2 images. See https://github.com/sismics/docs/issues/38 -->
|
<!-- Used to read JBIG2 images. See https://github.com/sismics/docs/issues/38 -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.levigo.jbig2</groupId>
|
<groupId>com.levigo.jbig2</groupId>
|
||||||
|
@ -146,8 +146,14 @@ public class FileResource extends BaseResource {
|
|||||||
file.setUserId(principal.getId());
|
file.setUserId(principal.getId());
|
||||||
String fileId = fileDao.create(file);
|
String fileId = fileDao.create(file);
|
||||||
|
|
||||||
|
// Guess the mime type a second time, for open document format (first detected as simple ZIP file)
|
||||||
|
file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, fileInputStream));
|
||||||
|
|
||||||
|
// Convert to PDF if necessary (for thumbnail and text extraction)
|
||||||
|
InputStream pdfIntputStream = FileUtil.convertToPdf(fileInputStream, file);
|
||||||
|
|
||||||
// Save the file
|
// Save the file
|
||||||
FileUtil.save(fileInputStream, file, user.getPrivateKey());
|
FileUtil.save(fileInputStream, pdfIntputStream, file, user.getPrivateKey());
|
||||||
|
|
||||||
// Update the user quota
|
// Update the user quota
|
||||||
user.setStorageCurrent(user.getStorageCurrent() + fileData.length);
|
user.setStorageCurrent(user.getStorageCurrent() + fileData.length);
|
||||||
@ -159,6 +165,7 @@ public class FileResource extends BaseResource {
|
|||||||
fileCreatedAsyncEvent.setDocument(document);
|
fileCreatedAsyncEvent.setDocument(document);
|
||||||
fileCreatedAsyncEvent.setFile(file);
|
fileCreatedAsyncEvent.setFile(file);
|
||||||
fileCreatedAsyncEvent.setInputStream(fileInputStream);
|
fileCreatedAsyncEvent.setInputStream(fileInputStream);
|
||||||
|
fileCreatedAsyncEvent.setPdfInputStream(pdfIntputStream);
|
||||||
AppContext.getInstance().getAsyncEventBus().post(fileCreatedAsyncEvent);
|
AppContext.getInstance().getAsyncEventBus().post(fileCreatedAsyncEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
18
docs-web/src/main/webapp/src/app/docs/filter/Filesize.js
Normal file
18
docs-web/src/main/webapp/src/app/docs/filter/Filesize.js
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format file sizes.
|
||||||
|
*/
|
||||||
|
angular.module('docs').filter('filesize', function() {
|
||||||
|
return function(text) {
|
||||||
|
if (!text) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
var size = parseInt(text);
|
||||||
|
if (size > 1000000) { // 1MB
|
||||||
|
return Math.round(size / 1000000) + 'MB';
|
||||||
|
}
|
||||||
|
return Math.round(size / 1000) + 'kB';
|
||||||
|
}
|
||||||
|
});
|
@ -1,7 +1,7 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filter converting new lines in <br />
|
* Filter converting new lines in <br />.
|
||||||
*/
|
*/
|
||||||
angular.module('docs').filter('newline', function() {
|
angular.module('docs').filter('newline', function() {
|
||||||
return function(text) {
|
return function(text) {
|
||||||
@ -10,4 +10,4 @@ angular.module('docs').filter('newline', function() {
|
|||||||
}
|
}
|
||||||
return text.replace(/\n/g, '<br/>');
|
return text.replace(/\n/g, '<br/>');
|
||||||
}
|
}
|
||||||
})
|
});
|
@ -10,4 +10,4 @@ angular.module('docs').filter('shorten', function() {
|
|||||||
}
|
}
|
||||||
return text.substring(0, 1).toUpperCase();
|
return text.substring(0, 1).toUpperCase();
|
||||||
}
|
}
|
||||||
})
|
});
|
18
docs-web/src/main/webapp/src/app/share/filter/Filesize.js
Normal file
18
docs-web/src/main/webapp/src/app/share/filter/Filesize.js
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format file sizes.
|
||||||
|
*/
|
||||||
|
angular.module('share').filter('filesize', function() {
|
||||||
|
return function(text) {
|
||||||
|
if (!text) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
var size = parseInt(text);
|
||||||
|
if (size > 1000000) { // 1MB
|
||||||
|
return Math.round(size / 1000000) + 'MB';
|
||||||
|
}
|
||||||
|
return Math.round(size / 1000) + 'kB';
|
||||||
|
}
|
||||||
|
});
|
@ -1,7 +1,7 @@
|
|||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filter converting new lines in <br />
|
* Filter converting new lines in <br />.
|
||||||
*/
|
*/
|
||||||
angular.module('share').filter('newline', function() {
|
angular.module('share').filter('newline', function() {
|
||||||
return function(text) {
|
return function(text) {
|
||||||
@ -10,4 +10,4 @@ angular.module('share').filter('newline', function() {
|
|||||||
}
|
}
|
||||||
return text.replace(/\n/g, '<br/>');
|
return text.replace(/\n/g, '<br/>');
|
||||||
}
|
}
|
||||||
})
|
});
|
@ -63,6 +63,7 @@
|
|||||||
<script src="app/docs/service/Tag.js" type="text/javascript"></script>
|
<script src="app/docs/service/Tag.js" type="text/javascript"></script>
|
||||||
<script src="app/docs/filter/Newline.js" type="text/javascript"></script>
|
<script src="app/docs/filter/Newline.js" type="text/javascript"></script>
|
||||||
<script src="app/docs/filter/Shorten.js" type="text/javascript"></script>
|
<script src="app/docs/filter/Shorten.js" type="text/javascript"></script>
|
||||||
|
<script src="app/docs/filter/Filesize.js" type="text/javascript"></script>
|
||||||
<script src="app/docs/directive/File.js" type="text/javascript"></script>
|
<script src="app/docs/directive/File.js" type="text/javascript"></script>
|
||||||
<script src="app/docs/directive/SelectTag.js" type="text/javascript"></script>
|
<script src="app/docs/directive/SelectTag.js" type="text/javascript"></script>
|
||||||
<script src="app/docs/directive/AuditLog.js" type="text/javascript"></script>
|
<script src="app/docs/directive/AuditLog.js" type="text/javascript"></script>
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
<div class="col-xs-6 col-sm-4 col-md-3 col-lg-2 text-center" ng-repeat="file in files">
|
<div class="col-xs-6 col-sm-4 col-md-3 col-lg-2 text-center" ng-repeat="file in files">
|
||||||
<div class="thumbnail" ng-class="{ 'thumbnail-checked': file.checked }" ng-if="file.id">
|
<div class="thumbnail" ng-class="{ 'thumbnail-checked': file.checked }" ng-if="file.id">
|
||||||
<a ng-click="openFile(file)">
|
<a ng-click="openFile(file)">
|
||||||
<img class="thumbnail-file" ng-src="../api/file/{{ file.id }}/data?size=thumb" tooltip="{{ file.mimetype }}" tooltip-placement="top" />
|
<img class="thumbnail-file" ng-src="../api/file/{{ file.id }}/data?size=thumb" tooltip="{{ file.mimetype }} | {{ file.size | filesize }}" tooltip-placement="top" />
|
||||||
</a>
|
</a>
|
||||||
<div class="caption pointer" ng-click="file.checked = !file.checked">
|
<div class="caption pointer" ng-click="file.checked = !file.checked">
|
||||||
<div class="pull-left">
|
<div class="pull-left">
|
||||||
|
@ -39,7 +39,8 @@
|
|||||||
<label class="col-sm-2 control-label" for="inputFiles">New files</label>
|
<label class="col-sm-2 control-label" for="inputFiles">New files</label>
|
||||||
<div class="col-sm-6">
|
<div class="col-sm-6">
|
||||||
<file class="form-control" id="inputFiles" multiple="multiple" ng-model="newFiles"
|
<file class="form-control" id="inputFiles" multiple="multiple" ng-model="newFiles"
|
||||||
accept="image/png,image/jpg,image/jpeg,image/gif,application/pdf" ng-disabled="fileIsUploading"></file>
|
accept="image/png,image/jpg,image/jpeg,image/gif,application/pdf,application/vnd.oasis.opendocument.text,application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
|
ng-disabled="fileIsUploading"></file>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-sm-4" ng-if="orphanFiles.length > 0">
|
<div class="col-sm-4" ng-if="orphanFiles.length > 0">
|
||||||
+ {{ orphanFiles.length }} file{{ orphanFiles.length > 1 ? 's' : '' }}
|
+ {{ orphanFiles.length }} file{{ orphanFiles.length > 1 ? 's' : '' }}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
<div class="col-xs-6 col-sm-4 col-md-4 col-lg-3 text-center" ng-repeat="file in files">
|
<div class="col-xs-6 col-sm-4 col-md-4 col-lg-3 text-center" ng-repeat="file in files">
|
||||||
<div class="thumbnail" ng-if="file.id">
|
<div class="thumbnail" ng-if="file.id">
|
||||||
<a ng-click="openFile(file)">
|
<a ng-click="openFile(file)">
|
||||||
<img class="thumbnail-file" ng-src="../api/file/{{ file.id }}/data?size=thumb" tooltip="{{ file.mimetype }}" tooltip-placement="top" />
|
<img class="thumbnail-file" ng-src="../api/file/{{ file.id }}/data?size=thumb" tooltip="{{ file.mimetype }} | {{ file.size | filesize }}" tooltip-placement="top" />
|
||||||
</a>
|
</a>
|
||||||
<div class="caption" ng-show="document.writable">
|
<div class="caption" ng-show="document.writable">
|
||||||
<div class="pull-left">
|
<div class="pull-left">
|
||||||
|
@ -18,7 +18,9 @@
|
|||||||
<div class="col-xs-6 col-sm-4 col-md-3 col-lg-2 text-center" ng-repeat="file in files">
|
<div class="col-xs-6 col-sm-4 col-md-3 col-lg-2 text-center" ng-repeat="file in files">
|
||||||
<div class="thumbnail">
|
<div class="thumbnail">
|
||||||
<a ng-click="openFile(file)">
|
<a ng-click="openFile(file)">
|
||||||
<img class="thumbnail-file" ng-src="../api/file/{{ file.id }}/data?size=thumb&share={{ $stateParams.shareId }}" tooltip="{{ file.mimetype }}" tooltip-placement="top" />
|
<img class="thumbnail-file"
|
||||||
|
ng-src="../api/file/{{ file.id }}/data?size=thumb&share={{ $stateParams.shareId }}"
|
||||||
|
tooltip="{{ file.mimetype }} | {{ file.size | filesize }}" tooltip-placement="top" />
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
<script src="app/share/controller/FileView.js" type="text/javascript"></script>
|
<script src="app/share/controller/FileView.js" type="text/javascript"></script>
|
||||||
<script src="app/share/controller/FileModalView.js" type="text/javascript"></script>
|
<script src="app/share/controller/FileModalView.js" type="text/javascript"></script>
|
||||||
<script src="app/share/filter/Newline.js" type="text/javascript"></script>
|
<script src="app/share/filter/Newline.js" type="text/javascript"></script>
|
||||||
|
<script src="app/share/filter/Filesize.js" type="text/javascript"></script>
|
||||||
<!-- endref -->
|
<!-- endref -->
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
@ -267,6 +267,124 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
return json.getJsonArray("documents").size();
|
return json.getJsonArray("documents").size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test ODT extraction.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testOdtExtraction() throws Exception {
|
||||||
|
// Login document_odt
|
||||||
|
clientUtil.createUser("document_odt");
|
||||||
|
String documentOdtToken = clientUtil.login("document_odt");
|
||||||
|
|
||||||
|
// Create a document
|
||||||
|
long create1Date = new Date().getTime();
|
||||||
|
JsonObject json = target().path("/document").request()
|
||||||
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentOdtToken)
|
||||||
|
.put(Entity.form(new Form()
|
||||||
|
.param("title", "My super title document 1")
|
||||||
|
.param("description", "My super description for document 1")
|
||||||
|
.param("language", "eng")
|
||||||
|
.param("create_date", Long.toString(create1Date))), JsonObject.class);
|
||||||
|
String document1Id = json.getString("id");
|
||||||
|
Assert.assertNotNull(document1Id);
|
||||||
|
|
||||||
|
// Add a PDF file
|
||||||
|
String file1Id = null;
|
||||||
|
try (InputStream is = Resources.getResource("file/document.odt").openStream()) {
|
||||||
|
StreamDataBodyPart streamDataBodyPart = new StreamDataBodyPart("file", is, "document.odt");
|
||||||
|
try (FormDataMultiPart multiPart = new FormDataMultiPart()) {
|
||||||
|
json = target()
|
||||||
|
.register(MultiPartFeature.class)
|
||||||
|
.path("/file").request()
|
||||||
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentOdtToken)
|
||||||
|
.put(Entity.entity(multiPart.field("id", document1Id).bodyPart(streamDataBodyPart),
|
||||||
|
MediaType.MULTIPART_FORM_DATA_TYPE), JsonObject.class);
|
||||||
|
file1Id = json.getString("id");
|
||||||
|
Assert.assertNotNull(file1Id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search documents by query in full content
|
||||||
|
json = target().path("/document/list")
|
||||||
|
.queryParam("search", "full:ipsum")
|
||||||
|
.request()
|
||||||
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentOdtToken)
|
||||||
|
.get(JsonObject.class);
|
||||||
|
Assert.assertTrue(json.getJsonArray("documents").size() == 1);
|
||||||
|
|
||||||
|
// Get the file thumbnail data
|
||||||
|
Response response = target().path("/file/" + file1Id + "/data")
|
||||||
|
.queryParam("size", "thumb")
|
||||||
|
.request()
|
||||||
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentOdtToken)
|
||||||
|
.get();
|
||||||
|
InputStream is = (InputStream) response.getEntity();
|
||||||
|
byte[] fileBytes = ByteStreams.toByteArray(is);
|
||||||
|
Assert.assertTrue(fileBytes.length > 0); // Images rendered from PDF differ in size from OS to OS due to font issues
|
||||||
|
Assert.assertEquals(MimeType.IMAGE_JPEG, MimeTypeUtil.guessMimeType(fileBytes));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test DOCX extraction.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDocxExtraction() throws Exception {
|
||||||
|
// Login document_docx
|
||||||
|
clientUtil.createUser("document_docx");
|
||||||
|
String documentDocxToken = clientUtil.login("document_docx");
|
||||||
|
|
||||||
|
// Create a document
|
||||||
|
long create1Date = new Date().getTime();
|
||||||
|
JsonObject json = target().path("/document").request()
|
||||||
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentDocxToken)
|
||||||
|
.put(Entity.form(new Form()
|
||||||
|
.param("title", "My super title document 1")
|
||||||
|
.param("description", "My super description for document 1")
|
||||||
|
.param("language", "eng")
|
||||||
|
.param("create_date", Long.toString(create1Date))), JsonObject.class);
|
||||||
|
String document1Id = json.getString("id");
|
||||||
|
Assert.assertNotNull(document1Id);
|
||||||
|
|
||||||
|
// Add a PDF file
|
||||||
|
String file1Id = null;
|
||||||
|
try (InputStream is = Resources.getResource("file/document.docx").openStream()) {
|
||||||
|
StreamDataBodyPart streamDataBodyPart = new StreamDataBodyPart("file", is, "document.docx");
|
||||||
|
try (FormDataMultiPart multiPart = new FormDataMultiPart()) {
|
||||||
|
json = target()
|
||||||
|
.register(MultiPartFeature.class)
|
||||||
|
.path("/file").request()
|
||||||
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentDocxToken)
|
||||||
|
.put(Entity.entity(multiPart.field("id", document1Id).bodyPart(streamDataBodyPart),
|
||||||
|
MediaType.MULTIPART_FORM_DATA_TYPE), JsonObject.class);
|
||||||
|
file1Id = json.getString("id");
|
||||||
|
Assert.assertNotNull(file1Id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search documents by query in full content
|
||||||
|
json = target().path("/document/list")
|
||||||
|
.queryParam("search", "full:dolor")
|
||||||
|
.request()
|
||||||
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentDocxToken)
|
||||||
|
.get(JsonObject.class);
|
||||||
|
Assert.assertTrue(json.getJsonArray("documents").size() == 1);
|
||||||
|
|
||||||
|
// Get the file thumbnail data
|
||||||
|
Response response = target().path("/file/" + file1Id + "/data")
|
||||||
|
.queryParam("size", "thumb")
|
||||||
|
.request()
|
||||||
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentDocxToken)
|
||||||
|
.get();
|
||||||
|
InputStream is = (InputStream) response.getEntity();
|
||||||
|
byte[] fileBytes = ByteStreams.toByteArray(is);
|
||||||
|
Assert.assertTrue(fileBytes.length > 0); // Images rendered from PDF differ in size from OS to OS due to font issues
|
||||||
|
Assert.assertEquals(MimeType.IMAGE_JPEG, MimeTypeUtil.guessMimeType(fileBytes));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test PDF extraction.
|
* Test PDF extraction.
|
||||||
*
|
*
|
||||||
@ -274,14 +392,14 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testPdfExtraction() throws Exception {
|
public void testPdfExtraction() throws Exception {
|
||||||
// Login document2
|
// Login document_pdf
|
||||||
clientUtil.createUser("document2");
|
clientUtil.createUser("document_pdf");
|
||||||
String document2Token = clientUtil.login("document2");
|
String documentPdfToken = clientUtil.login("document_pdf");
|
||||||
|
|
||||||
// Create a document
|
// Create a document
|
||||||
long create1Date = new Date().getTime();
|
long create1Date = new Date().getTime();
|
||||||
JsonObject json = target().path("/document").request()
|
JsonObject json = target().path("/document").request()
|
||||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document2Token)
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPdfToken)
|
||||||
.put(Entity.form(new Form()
|
.put(Entity.form(new Form()
|
||||||
.param("title", "My super title document 1")
|
.param("title", "My super title document 1")
|
||||||
.param("description", "My super description for document 1")
|
.param("description", "My super description for document 1")
|
||||||
@ -298,7 +416,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
json = target()
|
json = target()
|
||||||
.register(MultiPartFeature.class)
|
.register(MultiPartFeature.class)
|
||||||
.path("/file").request()
|
.path("/file").request()
|
||||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document2Token)
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPdfToken)
|
||||||
.put(Entity.entity(multiPart.field("id", document1Id).bodyPart(streamDataBodyPart),
|
.put(Entity.entity(multiPart.field("id", document1Id).bodyPart(streamDataBodyPart),
|
||||||
MediaType.MULTIPART_FORM_DATA_TYPE), JsonObject.class);
|
MediaType.MULTIPART_FORM_DATA_TYPE), JsonObject.class);
|
||||||
file1Id = json.getString("id");
|
file1Id = json.getString("id");
|
||||||
@ -310,7 +428,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
json = target().path("/document/list")
|
json = target().path("/document/list")
|
||||||
.queryParam("search", "full:vrandecic")
|
.queryParam("search", "full:vrandecic")
|
||||||
.request()
|
.request()
|
||||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document2Token)
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPdfToken)
|
||||||
.get(JsonObject.class);
|
.get(JsonObject.class);
|
||||||
Assert.assertTrue(json.getJsonArray("documents").size() == 1);
|
Assert.assertTrue(json.getJsonArray("documents").size() == 1);
|
||||||
|
|
||||||
@ -318,7 +436,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
Response response = target().path("/file/" + file1Id + "/data")
|
Response response = target().path("/file/" + file1Id + "/data")
|
||||||
.queryParam("size", "thumb")
|
.queryParam("size", "thumb")
|
||||||
.request()
|
.request()
|
||||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document2Token)
|
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPdfToken)
|
||||||
.get();
|
.get();
|
||||||
InputStream is = (InputStream) response.getEntity();
|
InputStream is = (InputStream) response.getEntity();
|
||||||
byte[] fileBytes = ByteStreams.toByteArray(is);
|
byte[] fileBytes = ByteStreams.toByteArray(is);
|
||||||
|
BIN
docs-web/src/test/resources/file/document.docx
Normal file
BIN
docs-web/src/test/resources/file/document.docx
Normal file
Binary file not shown.
BIN
docs-web/src/test/resources/file/document.odt
Normal file
BIN
docs-web/src/test/resources/file/document.odt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user