mirror of
https://github.com/sismics/docs.git
synced 2024-11-22 05:57:57 +01:00
Closes #53: Build thumbnails for DOCX and ODT files
This commit is contained in:
parent
1a37d97a61
commit
7708f61343
@ -21,7 +21,7 @@ Features
|
||||
|
||||
- Responsive user interface
|
||||
- Optical character recognition
|
||||
- Support image and PDF files
|
||||
- Support image, PDF, ODT and DOCX files
|
||||
- Flexible search engine
|
||||
- Full text search in image and PDF
|
||||
- 256-bit AES encryption
|
||||
|
@ -28,58 +28,43 @@ public class FileCreatedAsyncEvent {
|
||||
private InputStream inputStream;
|
||||
|
||||
/**
|
||||
* Getter of file.
|
||||
*
|
||||
* @return the file
|
||||
* Unencrypted input stream containing a PDF representation
|
||||
* of the file. May be null if the PDF conversion is not
|
||||
* necessary or not possible.
|
||||
*/
|
||||
private InputStream pdfInputStream;
|
||||
|
||||
public File getFile() {
|
||||
return file;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter of file.
|
||||
*
|
||||
* @param file file
|
||||
*/
|
||||
public void setFile(File file) {
|
||||
this.file = file;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getter of document.
|
||||
*
|
||||
* @return the document
|
||||
*/
|
||||
public Document getDocument() {
|
||||
return document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter of document.
|
||||
*
|
||||
* @param document document
|
||||
*/
|
||||
public void setDocument(Document document) {
|
||||
this.document = document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getter of inputStream.
|
||||
*
|
||||
* @return the inputStream
|
||||
*/
|
||||
public InputStream getInputStream() {
|
||||
return inputStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter de inputStream.
|
||||
*
|
||||
* @param inputStream inputStream
|
||||
*/
|
||||
public void setInputStream(InputStream inputStream) {
|
||||
this.inputStream = inputStream;
|
||||
}
|
||||
|
||||
public InputStream getPdfInputStream() {
|
||||
return pdfInputStream;
|
||||
}
|
||||
|
||||
public void setPdfInputStream(InputStream pdfInputStream) {
|
||||
this.pdfInputStream = pdfInputStream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -12,7 +12,6 @@ import com.sismics.docs.core.event.FileCreatedAsyncEvent;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.util.FileUtil;
|
||||
import com.sismics.docs.core.util.TransactionUtil;
|
||||
import com.sismics.util.mime.MimeTypeUtil;
|
||||
|
||||
/**
|
||||
* Listener on file created.
|
||||
@ -39,12 +38,15 @@ public class FileCreatedAsyncListener {
|
||||
|
||||
// Guess the mime type a second time, for open document format (first detected as simple ZIP file)
|
||||
final File file = fileCreatedAsyncEvent.getFile();
|
||||
file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, fileCreatedAsyncEvent.getInputStream()));
|
||||
|
||||
// Extract text content from the file
|
||||
long startTime = System.currentTimeMillis();
|
||||
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getDocument(), file, fileCreatedAsyncEvent.getInputStream());
|
||||
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getDocument(), file,
|
||||
fileCreatedAsyncEvent.getInputStream(), fileCreatedAsyncEvent.getPdfInputStream());
|
||||
fileCreatedAsyncEvent.getInputStream().close();
|
||||
if (fileCreatedAsyncEvent.getPdfInputStream() != null) {
|
||||
fileCreatedAsyncEvent.getPdfInputStream().close();
|
||||
}
|
||||
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
|
||||
|
||||
// Store the text content in the database
|
||||
|
@ -1,6 +1,8 @@
|
||||
package com.sismics.docs.core.util;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
@ -48,19 +50,16 @@ public class FileUtil {
|
||||
* @param document Document linked to the file
|
||||
* @param file File to extract
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param pdfInputStream Unencrypted PDF input stream
|
||||
* @return Content extract
|
||||
*/
|
||||
public static String extractContent(Document document, File file, InputStream inputStream) {
|
||||
public static String extractContent(Document document, File file, InputStream inputStream, InputStream pdfInputStream) {
|
||||
String content = null;
|
||||
|
||||
if (ImageUtil.isImage(file.getMimeType())) {
|
||||
content = ocrFile(inputStream, document);
|
||||
} else if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
||||
content = extractPdf(inputStream);
|
||||
} else if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) {
|
||||
content = extractOpenDocumentText(inputStream);
|
||||
} else if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) {
|
||||
content = extractOfficeDocument(inputStream);
|
||||
} else if (pdfInputStream != null) {
|
||||
content = extractPdf(pdfInputStream);
|
||||
}
|
||||
|
||||
return content;
|
||||
@ -129,92 +128,80 @@ public class FileUtil {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text from an open document text file.
|
||||
* Convert a file to PDF if necessary.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @return Content extracted
|
||||
* @param inputStream InputStream
|
||||
* @param file File
|
||||
* @return PDF input stream
|
||||
* @throws Exception
|
||||
*/
|
||||
private static String extractOpenDocumentText(InputStream inputStream) {
|
||||
String content = null;
|
||||
Path tempFile = null;
|
||||
try {
|
||||
// Convert the ODT file to a temporary PDF file
|
||||
tempFile = Files.createTempFile("sismicsdocs_", ".pdf");
|
||||
try (OutputStream out = Files.newOutputStream(tempFile)) {
|
||||
OdfTextDocument document = OdfTextDocument.loadDocument(inputStream);
|
||||
PdfOptions options = PdfOptions.create();
|
||||
PdfConverter.getInstance().convert(document, out, options);
|
||||
}
|
||||
|
||||
// Extract content from the PDF file
|
||||
try (InputStream pdfInputStream = Files.newInputStream(tempFile)) {
|
||||
content = extractPdf(pdfInputStream);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Error while extracting text from the ODT", e);
|
||||
} finally {
|
||||
try {
|
||||
Files.delete(tempFile); // Delete the temporary PDF file
|
||||
} catch (IOException e) {
|
||||
// Should not happen
|
||||
}
|
||||
public static InputStream convertToPdf(InputStream inputStream, File file) throws Exception {
|
||||
if (file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
||||
// It's already PDF, just return the input
|
||||
return inputStream;
|
||||
}
|
||||
return content;
|
||||
|
||||
if (file.getMimeType().equals(MimeType.OFFICE_DOCUMENT)) {
|
||||
return convertOfficeDocument(inputStream);
|
||||
}
|
||||
|
||||
if (file.getMimeType().equals(MimeType.OPEN_DOCUMENT_TEXT)) {
|
||||
return convertOpenDocumentText(inputStream);
|
||||
}
|
||||
|
||||
// PDF conversion not necessary/possible
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text from an Office document.
|
||||
* Convert an open document text file to PDF.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @return Content extracted
|
||||
* @return PDF input stream
|
||||
* @throws Exception
|
||||
*/
|
||||
private static String extractOfficeDocument(InputStream inputStream) {
|
||||
String content = null;
|
||||
Path tempFile = null;
|
||||
try {
|
||||
// Convert the DOCX file to a temporary PDF file
|
||||
tempFile = Files.createTempFile("sismicsdocs_", ".pdf");
|
||||
try (OutputStream out = Files.newOutputStream(tempFile)) {
|
||||
XWPFDocument document = new XWPFDocument(inputStream);
|
||||
org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create();
|
||||
org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, out, options);
|
||||
}
|
||||
|
||||
// Extract content from the PDF file
|
||||
try (InputStream pdfInputStream = Files.newInputStream(tempFile)) {
|
||||
content = extractPdf(pdfInputStream);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Error while extracting text from the DOCX", e);
|
||||
} finally {
|
||||
try {
|
||||
Files.delete(tempFile); // Delete the temporary PDF file
|
||||
} catch (IOException e) {
|
||||
// Should not happen
|
||||
}
|
||||
}
|
||||
return content;
|
||||
private static InputStream convertOpenDocumentText(InputStream inputStream) throws Exception {
|
||||
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||
OdfTextDocument document = OdfTextDocument.loadDocument(inputStream);
|
||||
PdfOptions options = PdfOptions.create();
|
||||
PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
||||
inputStream.reset();
|
||||
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an Office document to PDF.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @return PDF input stream
|
||||
* @throws Exception
|
||||
*/
|
||||
private static InputStream convertOfficeDocument(InputStream inputStream) throws Exception {
|
||||
ByteArrayOutputStream pdfOutputStream = new ByteArrayOutputStream();
|
||||
XWPFDocument document = new XWPFDocument(inputStream);
|
||||
org.apache.poi.xwpf.converter.pdf.PdfOptions options = org.apache.poi.xwpf.converter.pdf.PdfOptions.create();
|
||||
org.apache.poi.xwpf.converter.pdf.PdfConverter.getInstance().convert(document, pdfOutputStream, options);
|
||||
inputStream.reset();
|
||||
return new ByteArrayInputStream(pdfOutputStream.toByteArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a file on the storage filesystem.
|
||||
*
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param pdf
|
||||
* @param file File to save
|
||||
* @param privateKey Private key used for encryption
|
||||
* @throws Exception
|
||||
*/
|
||||
public static void save(InputStream inputStream, File file, String privateKey) throws Exception {
|
||||
public static void save(InputStream inputStream, InputStream pdfInputStream, File file, String privateKey) throws Exception {
|
||||
Cipher cipher = EncryptionUtil.getEncryptionCipher(privateKey);
|
||||
Path path = DirectoryUtil.getStorageDirectory().resolve(file.getId());
|
||||
Files.copy(new CipherInputStream(inputStream, cipher), path);
|
||||
inputStream.reset();
|
||||
|
||||
// Generate file variations
|
||||
inputStream.reset();
|
||||
saveVariations(file, inputStream, cipher);
|
||||
inputStream.reset();
|
||||
saveVariations(file, inputStream, pdfInputStream, cipher);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -222,25 +209,27 @@ public class FileUtil {
|
||||
*
|
||||
* @param file File from database
|
||||
* @param inputStream Unencrypted input stream
|
||||
* @param pdfInputStream Unencrypted PDF input stream
|
||||
* @param cipher Cipher to use for encryption
|
||||
* @throws Exception
|
||||
*/
|
||||
public static void saveVariations(File file, InputStream inputStream, Cipher cipher) throws Exception {
|
||||
public static void saveVariations(File file, InputStream inputStream, InputStream pdfInputStream, Cipher cipher) throws Exception {
|
||||
BufferedImage image = null;
|
||||
if (ImageUtil.isImage(file.getMimeType())) {
|
||||
image = ImageIO.read(inputStream);
|
||||
} else if(file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
||||
inputStream.reset();
|
||||
} else if(pdfInputStream != null) {
|
||||
// Generate preview from the first page of the PDF
|
||||
PDDocument pdfDocument = null;
|
||||
try {
|
||||
pdfDocument = PDDocument.load(inputStream);
|
||||
pdfDocument = PDDocument.load(pdfInputStream);
|
||||
PDFRenderer renderer = new PDFRenderer(pdfDocument);
|
||||
image = renderer.renderImage(0);
|
||||
pdfInputStream.reset();
|
||||
} finally {
|
||||
pdfDocument.close();
|
||||
}
|
||||
}
|
||||
// TODO Generate thumbnails for DOCX/ODT documents (guess the MIME type earlier and build a PDF version now?)
|
||||
|
||||
if (image != null) {
|
||||
// Generate thumbnails from image
|
||||
|
@ -1,9 +1,11 @@
|
||||
package com.sismics.docs.core.util;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.apache.pdfbox.io.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.io.Resources;
|
||||
@ -18,19 +20,25 @@ import com.sismics.util.mime.MimeType;
|
||||
public class TestFileUtil {
|
||||
@Test
|
||||
public void extractContentOpenDocumentTextTest() throws Exception {
|
||||
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream()) {
|
||||
try (InputStream inputStream = Resources.getResource("file/document.odt").openStream();
|
||||
InputStream bytesInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.OPEN_DOCUMENT_TEXT);
|
||||
Assert.assertEquals("Lorem ipsum dolor sit amen.\r\n", FileUtil.extractContent(null, file, inputStream));
|
||||
try (InputStream pdfInputStream = FileUtil.convertToPdf(bytesInputStream, file)) {
|
||||
Assert.assertEquals("Lorem ipsum dolor sit amen.\r\n", FileUtil.extractContent(null, file, inputStream, pdfInputStream));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void extractContentOfficeDocumentTest() throws Exception {
|
||||
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream()) {
|
||||
try (InputStream inputStream = Resources.getResource("file/document.docx").openStream();
|
||||
InputStream bytesInputStream = new ByteArrayInputStream(IOUtils.toByteArray(inputStream))) {
|
||||
File file = new File();
|
||||
file.setMimeType(MimeType.OFFICE_DOCUMENT);
|
||||
Assert.assertEquals("Lorem ipsum dolor sit amen.\r\n", FileUtil.extractContent(null, file, inputStream));
|
||||
try (InputStream pdfInputStream = FileUtil.convertToPdf(bytesInputStream, file)) {
|
||||
Assert.assertEquals("Lorem ipsum dolor sit amen.\r\n", FileUtil.extractContent(null, file, inputStream, pdfInputStream));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -146,8 +146,14 @@ public class FileResource extends BaseResource {
|
||||
file.setUserId(principal.getId());
|
||||
String fileId = fileDao.create(file);
|
||||
|
||||
// Guess the mime type a second time, for open document format (first detected as simple ZIP file)
|
||||
file.setMimeType(MimeTypeUtil.guessOpenDocumentFormat(file, fileInputStream));
|
||||
|
||||
// Convert to PDF if necessary (for thumbnail and text extraction)
|
||||
InputStream pdfIntputStream = FileUtil.convertToPdf(fileInputStream, file);
|
||||
|
||||
// Save the file
|
||||
FileUtil.save(fileInputStream, file, user.getPrivateKey());
|
||||
FileUtil.save(fileInputStream, pdfIntputStream, file, user.getPrivateKey());
|
||||
|
||||
// Update the user quota
|
||||
user.setStorageCurrent(user.getStorageCurrent() + fileData.length);
|
||||
@ -159,6 +165,7 @@ public class FileResource extends BaseResource {
|
||||
fileCreatedAsyncEvent.setDocument(document);
|
||||
fileCreatedAsyncEvent.setFile(file);
|
||||
fileCreatedAsyncEvent.setInputStream(fileInputStream);
|
||||
fileCreatedAsyncEvent.setPdfInputStream(pdfIntputStream);
|
||||
AppContext.getInstance().getAsyncEventBus().post(fileCreatedAsyncEvent);
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,8 @@
|
||||
<label class="col-sm-2 control-label" for="inputFiles">New files</label>
|
||||
<div class="col-sm-6">
|
||||
<file class="form-control" id="inputFiles" multiple="multiple" ng-model="newFiles"
|
||||
accept="image/png,image/jpg,image/jpeg,image/gif,application/pdf" ng-disabled="fileIsUploading"></file>
|
||||
accept="image/png,image/jpg,image/jpeg,image/gif,application/pdf,application/vnd.oasis.opendocument.text,application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
ng-disabled="fileIsUploading"></file>
|
||||
</div>
|
||||
<div class="col-sm-4" ng-if="orphanFiles.length > 0">
|
||||
+ {{ orphanFiles.length }} file{{ orphanFiles.length > 1 ? 's' : '' }}
|
||||
|
@ -267,6 +267,124 @@ public class TestDocumentResource extends BaseJerseyTest {
|
||||
return json.getJsonArray("documents").size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test ODT extraction.
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testOdtExtraction() throws Exception {
|
||||
// Login document_odt
|
||||
clientUtil.createUser("document_odt");
|
||||
String documentOdtToken = clientUtil.login("document_odt");
|
||||
|
||||
// Create a document
|
||||
long create1Date = new Date().getTime();
|
||||
JsonObject json = target().path("/document").request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentOdtToken)
|
||||
.put(Entity.form(new Form()
|
||||
.param("title", "My super title document 1")
|
||||
.param("description", "My super description for document 1")
|
||||
.param("language", "eng")
|
||||
.param("create_date", Long.toString(create1Date))), JsonObject.class);
|
||||
String document1Id = json.getString("id");
|
||||
Assert.assertNotNull(document1Id);
|
||||
|
||||
// Add a PDF file
|
||||
String file1Id = null;
|
||||
try (InputStream is = Resources.getResource("file/document.odt").openStream()) {
|
||||
StreamDataBodyPart streamDataBodyPart = new StreamDataBodyPart("file", is, "document.odt");
|
||||
try (FormDataMultiPart multiPart = new FormDataMultiPart()) {
|
||||
json = target()
|
||||
.register(MultiPartFeature.class)
|
||||
.path("/file").request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentOdtToken)
|
||||
.put(Entity.entity(multiPart.field("id", document1Id).bodyPart(streamDataBodyPart),
|
||||
MediaType.MULTIPART_FORM_DATA_TYPE), JsonObject.class);
|
||||
file1Id = json.getString("id");
|
||||
Assert.assertNotNull(file1Id);
|
||||
}
|
||||
}
|
||||
|
||||
// Search documents by query in full content
|
||||
json = target().path("/document/list")
|
||||
.queryParam("search", "full:ipsum")
|
||||
.request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentOdtToken)
|
||||
.get(JsonObject.class);
|
||||
Assert.assertTrue(json.getJsonArray("documents").size() == 1);
|
||||
|
||||
// Get the file thumbnail data
|
||||
Response response = target().path("/file/" + file1Id + "/data")
|
||||
.queryParam("size", "thumb")
|
||||
.request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentOdtToken)
|
||||
.get();
|
||||
InputStream is = (InputStream) response.getEntity();
|
||||
byte[] fileBytes = ByteStreams.toByteArray(is);
|
||||
Assert.assertTrue(fileBytes.length > 0); // Images rendered from PDF differ in size from OS to OS due to font issues
|
||||
Assert.assertEquals(MimeType.IMAGE_JPEG, MimeTypeUtil.guessMimeType(fileBytes));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test DOCX extraction.
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testDocxExtraction() throws Exception {
|
||||
// Login document_docx
|
||||
clientUtil.createUser("document_docx");
|
||||
String documentDocxToken = clientUtil.login("document_docx");
|
||||
|
||||
// Create a document
|
||||
long create1Date = new Date().getTime();
|
||||
JsonObject json = target().path("/document").request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentDocxToken)
|
||||
.put(Entity.form(new Form()
|
||||
.param("title", "My super title document 1")
|
||||
.param("description", "My super description for document 1")
|
||||
.param("language", "eng")
|
||||
.param("create_date", Long.toString(create1Date))), JsonObject.class);
|
||||
String document1Id = json.getString("id");
|
||||
Assert.assertNotNull(document1Id);
|
||||
|
||||
// Add a PDF file
|
||||
String file1Id = null;
|
||||
try (InputStream is = Resources.getResource("file/document.docx").openStream()) {
|
||||
StreamDataBodyPart streamDataBodyPart = new StreamDataBodyPart("file", is, "document.docx");
|
||||
try (FormDataMultiPart multiPart = new FormDataMultiPart()) {
|
||||
json = target()
|
||||
.register(MultiPartFeature.class)
|
||||
.path("/file").request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentDocxToken)
|
||||
.put(Entity.entity(multiPart.field("id", document1Id).bodyPart(streamDataBodyPart),
|
||||
MediaType.MULTIPART_FORM_DATA_TYPE), JsonObject.class);
|
||||
file1Id = json.getString("id");
|
||||
Assert.assertNotNull(file1Id);
|
||||
}
|
||||
}
|
||||
|
||||
// Search documents by query in full content
|
||||
json = target().path("/document/list")
|
||||
.queryParam("search", "full:dolor")
|
||||
.request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentDocxToken)
|
||||
.get(JsonObject.class);
|
||||
Assert.assertTrue(json.getJsonArray("documents").size() == 1);
|
||||
|
||||
// Get the file thumbnail data
|
||||
Response response = target().path("/file/" + file1Id + "/data")
|
||||
.queryParam("size", "thumb")
|
||||
.request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentDocxToken)
|
||||
.get();
|
||||
InputStream is = (InputStream) response.getEntity();
|
||||
byte[] fileBytes = ByteStreams.toByteArray(is);
|
||||
Assert.assertTrue(fileBytes.length > 0); // Images rendered from PDF differ in size from OS to OS due to font issues
|
||||
Assert.assertEquals(MimeType.IMAGE_JPEG, MimeTypeUtil.guessMimeType(fileBytes));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test PDF extraction.
|
||||
*
|
||||
@ -274,14 +392,14 @@ public class TestDocumentResource extends BaseJerseyTest {
|
||||
*/
|
||||
@Test
|
||||
public void testPdfExtraction() throws Exception {
|
||||
// Login document2
|
||||
clientUtil.createUser("document2");
|
||||
String document2Token = clientUtil.login("document2");
|
||||
// Login document_pdf
|
||||
clientUtil.createUser("document_pdf");
|
||||
String documentPdfToken = clientUtil.login("document_pdf");
|
||||
|
||||
// Create a document
|
||||
long create1Date = new Date().getTime();
|
||||
JsonObject json = target().path("/document").request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document2Token)
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPdfToken)
|
||||
.put(Entity.form(new Form()
|
||||
.param("title", "My super title document 1")
|
||||
.param("description", "My super description for document 1")
|
||||
@ -298,7 +416,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
||||
json = target()
|
||||
.register(MultiPartFeature.class)
|
||||
.path("/file").request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document2Token)
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPdfToken)
|
||||
.put(Entity.entity(multiPart.field("id", document1Id).bodyPart(streamDataBodyPart),
|
||||
MediaType.MULTIPART_FORM_DATA_TYPE), JsonObject.class);
|
||||
file1Id = json.getString("id");
|
||||
@ -310,7 +428,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
||||
json = target().path("/document/list")
|
||||
.queryParam("search", "full:vrandecic")
|
||||
.request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document2Token)
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPdfToken)
|
||||
.get(JsonObject.class);
|
||||
Assert.assertTrue(json.getJsonArray("documents").size() == 1);
|
||||
|
||||
@ -318,7 +436,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
||||
Response response = target().path("/file/" + file1Id + "/data")
|
||||
.queryParam("size", "thumb")
|
||||
.request()
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, document2Token)
|
||||
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPdfToken)
|
||||
.get();
|
||||
InputStream is = (InputStream) response.getEntity();
|
||||
byte[] fileBytes = ByteStreams.toByteArray(is);
|
||||
|
BIN
docs-web/src/test/resources/file/document.docx
Normal file
BIN
docs-web/src/test/resources/file/document.docx
Normal file
Binary file not shown.
BIN
docs-web/src/test/resources/file/document.odt
Normal file
BIN
docs-web/src/test/resources/file/document.odt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user