diff --git a/.travis.yml b/.travis.yml index 6d4f395f..75ab2199 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ dist: trusty language: java before_install: - sudo apt-get -qq update - - sudo apt-get -y -q install tesseract-ocr tesseract-ocr-fra tesseract-ocr-ita tesseract-ocr-kor tesseract-ocr-rus tesseract-ocr-ukr tesseract-ocr-spa tesseract-ocr-ara tesseract-ocr-hin tesseract-ocr-deu tesseract-ocr-pol tesseract-ocr-jpn tesseract-ocr-por tesseract-ocr-tha tesseract-ocr-jpn tesseract-ocr-chi-sim tesseract-ocr-chi-tra + - sudo apt-get -y -q install ffmpeg mediainfo tesseract-ocr tesseract-ocr-fra tesseract-ocr-ita tesseract-ocr-kor tesseract-ocr-rus tesseract-ocr-ukr tesseract-ocr-spa tesseract-ocr-ara tesseract-ocr-hin tesseract-ocr-deu tesseract-ocr-pol tesseract-ocr-jpn tesseract-ocr-por tesseract-ocr-tha tesseract-ocr-jpn tesseract-ocr-chi-sim tesseract-ocr-chi-tra - sudo apt-get -y -q install haveged && sudo service haveged start after_success: - mvn -Pprod -DskipTests clean install diff --git a/Dockerfile b/Dockerfile index d9c9a2d4..fef4e570 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM sismics/jetty:9.2.20-jdk7 MAINTAINER b.gamard@sismics.com -RUN apt-get update && apt-get -y -q install tesseract-ocr tesseract-ocr-fra tesseract-ocr-ita tesseract-ocr-kor tesseract-ocr-rus tesseract-ocr-ukr tesseract-ocr-spa tesseract-ocr-ara tesseract-ocr-hin tesseract-ocr-deu tesseract-ocr-pol tesseract-ocr-jpn tesseract-ocr-por tesseract-ocr-tha tesseract-ocr-jpn tesseract-ocr-chi-sim tesseract-ocr-chi-tra && \ +RUN apt-get update && apt-get -y -q install ffmpeg mediainfo tesseract-ocr tesseract-ocr-fra tesseract-ocr-ita tesseract-ocr-kor tesseract-ocr-rus tesseract-ocr-ukr tesseract-ocr-spa tesseract-ocr-ara tesseract-ocr-hin tesseract-ocr-deu tesseract-ocr-pol tesseract-ocr-jpn tesseract-ocr-por tesseract-ocr-tha tesseract-ocr-jpn tesseract-ocr-chi-sim tesseract-ocr-chi-tra && \ apt-get clean && rm -rf /var/lib/apt/lists/* ENV TESSDATA_PREFIX /usr/share/tesseract-ocr diff --git a/README.md b/README.md index ea2c6987..64345a0b 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Features - Responsive user interface - Optical character recognition - Support image, PDF, ODT and DOCX files +- Video file support ![New!](https://www.sismics.com/public/img/new.png) - Flexible search engine - Full text search in all supported files - All [Dublin Core](http://dublincore.org/) metadata @@ -47,7 +48,7 @@ Features - Document sharing by URL - RESTful Web API - Fully featured Android client -- [Mass files importer](https://github.com/sismics/docs/tree/master/docs-importer) (single or scan mode) ![New!](https://www.sismics.com/public/img/new.png) +- [Bulk files importer](https://github.com/sismics/docs/tree/master/docs-importer) (single or scan mode) ![New!](https://www.sismics.com/public/img/new.png) - Tested to 100k documents Download diff --git a/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileCreatedAsyncListener.java b/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileCreatedAsyncListener.java index b2992de4..b476d1c1 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileCreatedAsyncListener.java +++ b/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileCreatedAsyncListener.java @@ -30,15 +30,13 @@ public class FileCreatedAsyncListener { * @throws Exception e */ @Subscribe - public void on(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception { + public void on(final FileCreatedAsyncEvent fileCreatedAsyncEvent) { if (log.isInfoEnabled()) { log.info("File created event: " + fileCreatedAsyncEvent.toString()); } - // Guess the mime type a second time, for open document format (first detected as simple ZIP file) - final File file = fileCreatedAsyncEvent.getFile(); - // Extract text content from the file + final File file = fileCreatedAsyncEvent.getFile(); long startTime = System.currentTimeMillis(); final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getLanguage(), file, fileCreatedAsyncEvent.getUnencryptedFile(), fileCreatedAsyncEvent.getUnencryptedPdfFile()); diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java b/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java index 8d6db868..3da17017 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java +++ b/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java @@ -12,6 +12,7 @@ import com.sismics.tess4j.Tesseract; import com.sismics.util.ImageDeskew; import com.sismics.util.ImageUtil; import com.sismics.util.Scalr; +import com.sismics.util.VideoUtil; import com.sismics.util.context.ThreadLocalContext; import com.sismics.util.mime.MimeTypeUtil; import org.apache.commons.lang.StringUtils; @@ -54,6 +55,8 @@ public class FileUtil { if (ImageUtil.isImage(file.getMimeType())) { content = ocrFile(unencryptedFile, language); + } else if (VideoUtil.isVideo(file.getMimeType())) { + content = VideoUtil.getMetadata(unencryptedFile); } else if (unencryptedPdfFile != null) { content = PdfUtil.extractPdf(unencryptedPdfFile); } @@ -114,8 +117,12 @@ public class FileUtil { Files.copy(new CipherInputStream(inputStream, cipher), path); } - // Generate file variations - saveVariations(file, unencryptedFile, unencryptedPdfFile, cipher); + // Generate file variations (errors non-blocking) + try { + saveVariations(file, unencryptedFile, unencryptedPdfFile, cipher); + } catch (Exception e) { + log.error("Unable to generate thumbnails", e); + } } /** @@ -132,6 +139,8 @@ public class FileUtil { try (InputStream inputStream = Files.newInputStream(unencryptedFile)) { image = ImageIO.read(inputStream); } + } else if (VideoUtil.isVideo(file.getMimeType())) { + image = VideoUtil.getThumbnail(unencryptedFile); } else if (unencryptedPdfFile != null) { // Generate preview from the first page of the PDF image = PdfUtil.renderFirstPage(unencryptedPdfFile); diff --git a/docs-core/src/main/java/com/sismics/util/ImageUtil.java b/docs-core/src/main/java/com/sismics/util/ImageUtil.java index edc4efa2..33a6ddee 100644 --- a/docs-core/src/main/java/com/sismics/util/ImageUtil.java +++ b/docs-core/src/main/java/com/sismics/util/ImageUtil.java @@ -22,13 +22,12 @@ import java.util.Iterator; * @author jtremeaux */ public class ImageUtil { - /** * Write a high quality JPEG. * - * @param image + * @param image Image * @param outputStream Output stream - * @throws IOException + * @throws IOException e */ public static void writeJpeg(BufferedImage image, OutputStream outputStream) throws IOException { Iterator iter = ImageIO.getImageWritersByFormatName("jpeg"); @@ -94,6 +93,14 @@ public class ImageUtil { .toString(); } + /** + * Return true if a pixel is black. + * + * @param image Image + * @param x X + * @param y Y + * @return True if black + */ public static boolean isBlack(BufferedImage image, int x, int y) { if (image.getType() == BufferedImage.TYPE_BYTE_BINARY) { WritableRaster raster = image.getRaster(); @@ -105,7 +112,16 @@ public class ImageUtil { return isBlack(image, x, y, luminanceValue); } - public static boolean isBlack(BufferedImage image, int x, int y, int luminanceCutOff) { + /** + * Return true if a pixel is black. + * + * @param image Image + * @param x X + * @param y Y + * @param luminanceCutOff Luminance cutoff + * @return True if black + */ + private static boolean isBlack(BufferedImage image, int x, int y, int luminanceCutOff) { int pixelRGBValue; int r; int g; @@ -124,7 +140,7 @@ public class ImageUtil { b = (pixelRGBValue) & 0xff; luminance = (r * 0.299) + (g * 0.587) + (b * 0.114); } catch (Exception e) { - // ignore. + // NOP } return luminance < luminanceCutOff; diff --git a/docs-core/src/main/java/com/sismics/util/VideoUtil.java b/docs-core/src/main/java/com/sismics/util/VideoUtil.java new file mode 100644 index 00000000..b1cd34d2 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/util/VideoUtil.java @@ -0,0 +1,84 @@ +package com.sismics.util; + +import com.google.common.base.Charsets; +import com.google.common.collect.Lists; +import com.google.common.io.ByteStreams; +import com.sismics.util.io.InputStreamReaderThread; +import com.sismics.util.mime.MimeType; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; + +/** + * Video processing utilities. + * + * @author bgamard + */ +public class VideoUtil { + /** + * Returns true if this MIME type is a video. + * @param mimeType MIME type + * @return True if video + */ + public static boolean isVideo(String mimeType) { + return mimeType.equals(MimeType.VIDEO_MP4) || mimeType.equals(MimeType.VIDEO_WEBM); + } + + /** + * Generate a thumbnail from a video file. + * + * @param file Video file + * @return Thumbnail + */ + public static BufferedImage getThumbnail(Path file) throws Exception { + List result = Lists.newLinkedList(Arrays.asList("ffmpeg", "-i")); + result.add(file.toAbsolutePath().toString()); + result.addAll(Arrays.asList("-vf", "\"thumbnail\"", "-frames:v", "1", "-f", "mjpeg", "-")); + ProcessBuilder pb = new ProcessBuilder(result); + Process process = pb.start(); + + // Consume the process error stream + final String commandName = pb.command().get(0); + new InputStreamReaderThread(process.getErrorStream(), commandName).start(); + + // Consume the data as an image + try (InputStream is = process.getInputStream()) { + return ImageIO.read(is); + } + } + + /** + * Extract metadata from a video file. + * + * @param file Video file + * @return Metadata + */ + public static String getMetadata(Path file) { + List result = Lists.newLinkedList(); + result.add("mediainfo"); + result.add(file.toAbsolutePath().toString()); + ProcessBuilder pb = new ProcessBuilder(result); + Process process; + try { + process = pb.start(); + } catch (IOException e) { + return null; + } + + // Consume the process error stream + final String commandName = pb.command().get(0); + new InputStreamReaderThread(process.getErrorStream(), commandName).start(); + + // Consume the data as a string + try (InputStream is = process.getInputStream()) { + return new String(ByteStreams.toByteArray(is), Charsets.UTF_8); + } catch (Exception e) { + return null; + } + } +} diff --git a/docs-core/src/main/java/com/sismics/util/io/InputStreamReaderThread.java b/docs-core/src/main/java/com/sismics/util/io/InputStreamReaderThread.java new file mode 100644 index 00000000..23c3819b --- /dev/null +++ b/docs-core/src/main/java/com/sismics/util/io/InputStreamReaderThread.java @@ -0,0 +1,49 @@ +package com.sismics.util.io; + +import com.google.common.io.Closer; +import org.apache.log4j.Logger; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; + +/** + * Thread that consumes data from an input stream and logs it. + * + * @author jtremeaux + */ +public class InputStreamReaderThread extends Thread { + + private static final Logger logger = Logger.getLogger(InputStreamReaderThread.class); + + private InputStream is; + + private String name; + + private Closer closer = Closer.create(); + + public InputStreamReaderThread(InputStream input, String name) { + super(name + " InputStreamReader thread"); + this.is = closer.register(input); + this.name = name; + } + + @Override + public void run() { + try { + BufferedReader reader = closer.register(new BufferedReader(new InputStreamReader(is))); + for (String line = reader.readLine(); line != null; line = reader.readLine()) { + logger.info(String.format(name + ": %s", line)); + } + } catch (IOException x) { + // NOP + } finally { + try { + closer.close(); + } catch (Exception e) { + // NOP + } + } + } +} diff --git a/docs-core/src/main/java/com/sismics/util/mime/MimeType.java b/docs-core/src/main/java/com/sismics/util/mime/MimeType.java index 8e77180a..a97f7807 100644 --- a/docs-core/src/main/java/com/sismics/util/mime/MimeType.java +++ b/docs-core/src/main/java/com/sismics/util/mime/MimeType.java @@ -24,5 +24,9 @@ public class MimeType { public static final String TEXT_CSV = "text/csv"; + public static final String VIDEO_WEBM = "video/webm"; + + public static final String VIDEO_MP4 = "video/mp4"; + public static final String DEFAULT = "application/octet-stream"; } diff --git a/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java b/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java index 52d5e1df..b08b471f 100644 --- a/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java +++ b/docs-core/src/main/java/com/sismics/util/mime/MimeTypeUtil.java @@ -1,5 +1,9 @@ package com.sismics.util.mime; +import com.google.common.base.Charsets; +import com.sismics.docs.core.model.jpa.File; +import org.apache.commons.compress.utils.IOUtils; + import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; @@ -8,13 +12,6 @@ import java.nio.file.Path; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import org.apache.commons.compress.archivers.ArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; -import org.apache.commons.compress.utils.IOUtils; - -import com.google.common.base.Charsets; -import com.sismics.docs.core.model.jpa.File; - /** * Utility to check MIME types. * @@ -60,6 +57,11 @@ public class MimeTypeUtil { return MimeType.IMAGE_PNG; } else if (headerBytes[0] == ((byte) 0x25) && headerBytes[1] == ((byte) 0x50) && headerBytes[2] == ((byte) 0x44) && headerBytes[3] == ((byte) 0x46)) { return MimeType.APPLICATION_PDF; + } else if (headerBytes[0] == ((byte) 0x00) && headerBytes[1] == ((byte) 0x00) && headerBytes[2] == ((byte) 0x00) && (headerBytes[3] == ((byte) 0x14) || headerBytes[3] == ((byte) 0x18)) + && headerBytes[4] == ((byte) 0x66) && headerBytes[5] == ((byte) 0x74) && headerBytes[6] == ((byte) 0x79) && headerBytes[7] == ((byte) 0x70)) { + return MimeType.VIDEO_MP4; + } else if (headerBytes[0] == ((byte) 0x1a) && headerBytes[1] == ((byte) 0x45) && headerBytes[2] == ((byte) 0xdf) && headerBytes[3] == ((byte) 0xa3)) { + return MimeType.VIDEO_WEBM; } // Detect by file extension @@ -100,6 +102,10 @@ public class MimeTypeUtil { return "txt"; case MimeType.TEXT_CSV: return "csv"; + case MimeType.VIDEO_MP4: + return "mp4"; + case MimeType.VIDEO_WEBM: + return "webm"; default: return "bin"; } diff --git a/docs-core/src/main/resources/messages.properties.fr b/docs-core/src/main/resources/messages.properties.fr new file mode 100644 index 00000000..e80dd012 --- /dev/null +++ b/docs-core/src/main/resources/messages.properties.fr @@ -0,0 +1,10 @@ +email.template.password_recovery.subject=R\u00E9initialiser votre mot de passe +email.template.password_recovery.hello=Bonjour {0}. +email.template.password_recovery.instruction1=Nous avons re\u00E7u une demande de r\u00E9initialisation de mot de passe.
Si vous n'avez rien demand\u00E9, vous pouvez ignorer cet mail. +email.template.password_recovery.instruction2=Pour r\u00E9initialiser votre mot de passe, cliquez sur le lien ci-dessous : +email.template.password_recovery.click_here=Cliquez ici pour r\u00E9initialiser votre mot de passe. +email.template.route_step_validate.subject=Un document n\u00E9cessite votre attention +email.template.route_step_validate.hello=Bonjour {0}. +email.template.route_step_validate.instruction1=Une \u00E9tape de workflow vous a \u00E9t\u00E9 attribu\u00E9e et n\u00E9cessite votre attention. +email.template.route_step_validate.instruction2=Pour voir le document et valider le workflow, veuillez visiter le lien ci-dessous : +email.no_html.error=Votre client mail ne supporte pas les messages HTML diff --git a/docs-core/src/main/resources/messages.properties.zh_CN b/docs-core/src/main/resources/messages.properties.zh_CN new file mode 100644 index 00000000..a0627b0d --- /dev/null +++ b/docs-core/src/main/resources/messages.properties.zh_CN @@ -0,0 +1,10 @@ +email.template.password_recovery.subject=\u8BF7\u91CD\u7F6E\u60A8\u7684\u5BC6\u7801 +email.template.password_recovery.hello=\u60A8\u597D {0}. +email.template.password_recovery.instruction1=\u6211\u4EEC\u6536\u5230\u4E86\u4E00\u4E2A\u91CD\u7F6E\u60A8\u7684\u5BC6\u7801\u7684\u8BF7\u6C42\u3002
\u5982\u679C\u60A8\u6CA1\u6709\u53D1\u9001\u8BE5\u8BF7\u6C42\uFF0C\u8BF7\u5FFD\u7565\u6B64\u7535\u5B50\u90AE\u4EF6 +email.template.password_recovery.instruction2=\u8981\u91CD\u7F6E\u60A8\u7684\u5BC6\u7801\uFF0C\u8BF7\u8BBF\u95EE\u4EE5\u4E0B\u94FE\u63A5\uFF1A +email.template.password_recovery.click_here=\u8BF7\u70B9\u51FB\u6B64\u5904\u91CD\u7F6E\u60A8\u7684\u5BC6\u7801 +email.template.route_step_validate.subject=\u4E00\u4EFD\u6587\u4EF6\u9700\u8981\u4F60\u7684\u5173\u6CE8 +email.template.route_step_validate.hello={0}\uFF0C\u60A8\u597D. +email.template.route_step_validate.instruction1=\u5DE5\u4F5C\u6D41\u6B65\u9AA4\u5DF2\u7ECF\u5206\u914D\u7ED9\u60A8\uFF0C\u9700\u8981\u60A8\u7684\u5173\u6CE8\u3002 +email.template.route_step_validate.instruction2=\u8981\u67E5\u770B\u6587\u6863\u5E76\u9A8C\u8BC1\u5DE5\u4F5C\u6D41\u7A0B\uFF0C\u8BF7\u8BBF\u95EE\u4EE5\u4E0B\u94FE\u63A5\uFF1A +email.no_html.error=\u60A8\u7684\u7535\u5B50\u90AE\u4EF6\u5BA2\u6237\u7AEF\u4E0D\u652F\u6301HTML\u683C\u5F0F\u90AE\u4EF6 diff --git a/docs-core/src/main/resources/messages.properties.zh_TW b/docs-core/src/main/resources/messages.properties.zh_TW new file mode 100644 index 00000000..da961212 --- /dev/null +++ b/docs-core/src/main/resources/messages.properties.zh_TW @@ -0,0 +1,10 @@ +email.template.password_recovery.subject=\u8ACB\u91CD\u65B0\u8A2D\u7F6E\u60A8\u7684\u5BC6\u78BC +email.template.password_recovery.hello=\u60A8\u597D{0}\uFF01 +email.template.password_recovery.instruction1=\u6211\u5011\u6536\u5230\u4E86\u91CD\u7F6E\u5BC6\u78BC\u7684\u8ACB\u6C42\u3002
\u5982\u679C\u60A8\u6C92\u6709\u8ACB\u6C42\u5E6B\u52A9\uFF0C\u8ACB\u5FFD\u7565\u6B64\u96FB\u5B50\u90F5\u4EF6\u3002 +email.template.password_recovery.instruction2=\u8981\u91CD\u7F6E\u60A8\u7684\u5BC6\u78BC\uFF0C\u8ACB\u8A2A\u554F\u4EE5\u4E0B\u93C8\u63A5\uFF1A +email.template.password_recovery.click_here=\u9EDE\u64CA\u9019\u88E1\u91CD\u7F6E\u60A8\u7684\u5BC6\u78BC +email.template.route_step_validate.subject=\u4E00\u4EFD\u6587\u4EF6\u9700\u8981\u4F60\u7684\u95DC\u6CE8 +email.template.route_step_validate.hello={0}\uFF0C\u60A8\u597D. +email.template.route_step_validate.instruction1=\u5DE5\u4F5C\u6D41\u6B65\u9A5F\u5DF2\u7D93\u5206\u914D\u7D66\u60A8\uFF0C\u9700\u8981\u60A8\u7684\u95DC\u6CE8\u3002 +email.template.route_step_validate.instruction2=\u8981\u67E5\u770B\u6587\u6A94\u4E26\u9A57\u8B49\u5DE5\u4F5C\u6D41\u7A0B\uFF0C\u8ACB\u8A2A\u554F\u4EE5\u4E0B\u93C8\u63A5\uFF1A +email.no_html.error=\u60A8\u7684\u96FB\u5B50\u90F5\u4EF6\u5BA2\u6236\u7AEF\u4E0D\u652F\u6301HTML\u683C\u5F0F\u90F5\u4EF6 diff --git a/docs-core/src/test/java/com/sismics/util/TestImageUtil.java b/docs-core/src/test/java/com/sismics/util/TestImageUtil.java index 6d980aaa..ffe0928f 100644 --- a/docs-core/src/test/java/com/sismics/util/TestImageUtil.java +++ b/docs-core/src/test/java/com/sismics/util/TestImageUtil.java @@ -11,7 +11,7 @@ import org.junit.Test; public class TestImageUtil { @Test - public void computeGravatarTest() throws Exception { + public void computeGravatarTest() { Assert.assertEquals("0bc83cb571cd1c50ba6f3e8a78ef1346", ImageUtil.computeGravatar("MyEmailAddress@example.com ")); } } diff --git a/docs-web/src/main/webapp/src/partial/docs/file.view.html b/docs-web/src/main/webapp/src/partial/docs/file.view.html index c78062f7..c7f35feb 100644 --- a/docs-web/src/main/webapp/src/partial/docs/file.view.html +++ b/docs-web/src/main/webapp/src/partial/docs/file.view.html @@ -20,12 +20,17 @@ - -
+
+ + + +

{{ 'file.view.not_found' | translate }} diff --git a/docs-web/src/main/webapp/src/style/main.less b/docs-web/src/main/webapp/src/style/main.less index f86e92a1..06970606 100644 --- a/docs-web/src/main/webapp/src/style/main.less +++ b/docs-web/src/main/webapp/src/style/main.less @@ -392,6 +392,33 @@ input[readonly].share-link { } } +// Video player +.video-overlay { + display: block; + position: absolute; + top: 0; + bottom: 0; + left: 0; + right: 0; + font-size: 500%; + color: #242424; + + .glyphicon { + text-shadow: 0 0 20px #fff; + top: 50%; + transform: translateY(-50%); + } + + &:hover { + color: #444; + } + + video { + cursor: default; + width: 100%; + } +} + // Vertical alignment .vertical-center { min-height: 100vh; diff --git a/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java b/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java index a38cf032..8c61e591 100644 --- a/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java +++ b/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java @@ -603,6 +603,65 @@ public class TestDocumentResource extends BaseJerseyTest { Assert.assertEquals(MimeType.IMAGE_JPEG, MimeTypeUtil.guessMimeType(fileBytes, null)); } + /** + * Test video extraction. + * + * @throws Exception e + */ + @Test + public void testVideoExtraction() throws Exception { + // Login document_video + clientUtil.createUser("document_video"); + String documentPlainToken = clientUtil.login("document_video"); + + // Create a document + long create1Date = new Date().getTime(); + JsonObject json = target().path("/document").request() + .cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPlainToken) + .put(Entity.form(new Form() + .param("title", "My super title document 1") + .param("description", "My super description for document 1") + .param("language", "eng") + .param("create_date", Long.toString(create1Date))), JsonObject.class); + String document1Id = json.getString("id"); + Assert.assertNotNull(document1Id); + + // Add a video file + String file1Id; + try (InputStream is = Resources.getResource("file/video.webm").openStream()) { + StreamDataBodyPart streamDataBodyPart = new StreamDataBodyPart("file", is, "video.webm"); + try (FormDataMultiPart multiPart = new FormDataMultiPart()) { + json = target() + .register(MultiPartFeature.class) + .path("/file").request() + .cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPlainToken) + .put(Entity.entity(multiPart.field("id", document1Id).bodyPart(streamDataBodyPart), + MediaType.MULTIPART_FORM_DATA_TYPE), JsonObject.class); + file1Id = json.getString("id"); + Assert.assertNotNull(file1Id); + } + } + + // Search documents by query in full content + json = target().path("/document/list") + .queryParam("search", "full:vp9") + .request() + .cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPlainToken) + .get(JsonObject.class); + Assert.assertTrue(json.getJsonArray("documents").size() == 1); + + // Get the file thumbnail data + Response response = target().path("/file/" + file1Id + "/data") + .queryParam("size", "thumb") + .request() + .cookie(TokenBasedSecurityFilter.COOKIE_NAME, documentPlainToken) + .get(); + InputStream is = (InputStream) response.getEntity(); + byte[] fileBytes = ByteStreams.toByteArray(is); + Assert.assertTrue(fileBytes.length > 0); // Images rendered from PDF differ in size from OS to OS due to font issues + Assert.assertEquals(MimeType.IMAGE_JPEG, MimeTypeUtil.guessMimeType(fileBytes, null)); + } + /** * Test EML import. * diff --git a/docs-web/src/test/resources/file/video.webm b/docs-web/src/test/resources/file/video.webm new file mode 100644 index 00000000..0757a975 Binary files /dev/null and b/docs-web/src/test/resources/file/video.webm differ