diff --git a/docs-core/pom.xml b/docs-core/pom.xml
index 77ea42ed..d80e2d8d 100644
--- a/docs-core/pom.xml
+++ b/docs-core/pom.xml
@@ -189,7 +189,26 @@
org.postgresql
postgresql
-
+
+
+
+ javax.xml.bind
+ jaxb-api
+ 2.3.0
+
+
+
+ com.sun.xml.bind
+ jaxb-core
+ 2.3.0
+
+
+
+ com.sun.xml.bind
+ jaxb-impl
+ 2.3.0
+
+
junit
diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/format/PdfFormatHandler.java b/docs-core/src/main/java/com/sismics/docs/core/util/format/PdfFormatHandler.java
index 08c698a1..670358b9 100644
--- a/docs-core/src/main/java/com/sismics/docs/core/util/format/PdfFormatHandler.java
+++ b/docs-core/src/main/java/com/sismics/docs/core/util/format/PdfFormatHandler.java
@@ -6,6 +6,7 @@ import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.slf4j.Logger;
@@ -60,7 +61,7 @@ public class PdfFormatHandler implements FormatHandler {
for (int pageIndex = 0; pageIndex < pdfDocument.getNumberOfPages(); pageIndex++) {
log.info("OCR page " + (pageIndex + 1) + "/" + pdfDocument.getNumberOfPages() + " of PDF file containing only images");
sb.append(" ");
- sb.append(FileUtil.ocrFile(language, renderer.renderImage(pageIndex)));
+ sb.append(FileUtil.ocrFile(language, renderer.renderImageWithDPI(pageIndex, 300, ImageType.GRAY)));
}
return sb.toString();
} catch (Exception e) {
diff --git a/docs-core/src/test/java/com/sismics/util/format/TestPdfFormatHandler.java b/docs-core/src/test/java/com/sismics/util/format/TestPdfFormatHandler.java
new file mode 100644
index 00000000..7b664df7
--- /dev/null
+++ b/docs-core/src/test/java/com/sismics/util/format/TestPdfFormatHandler.java
@@ -0,0 +1,19 @@
+package com.sismics.util.format;
+
+import com.sismics.docs.core.util.format.PdfFormatHandler;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.file.Paths;
+
+public class TestPdfFormatHandler {
+ @Test
+ public void testIssue373() throws Exception {
+ PdfFormatHandler formatHandler = new PdfFormatHandler();
+ String content = formatHandler.extractContent("deu", Paths.get(ClassLoader.getSystemResource("file/issue373.pdf").toURI()));
+ Assert.assertTrue(content.contains("Aufrechterhaltung"));
+ Assert.assertTrue(content.contains("Außentemperatur"));
+ Assert.assertTrue(content.contains("Grundumsatzmessungen"));
+ Assert.assertTrue(content.contains("ermitteln"));
+ }
+}
diff --git a/docs-core/src/test/resources/file/issue373.pdf b/docs-core/src/test/resources/file/issue373.pdf
new file mode 100644
index 00000000..180fc9b7
Binary files /dev/null and b/docs-core/src/test/resources/file/issue373.pdf differ
diff --git a/docs-web/pom.xml b/docs-web/pom.xml
index 9fd91074..e3abadea 100644
--- a/docs-web/pom.xml
+++ b/docs-web/pom.xml
@@ -26,25 +26,6 @@
docs-web-common
-
-
- javax.xml.bind
- jaxb-api
- 2.3.0
-
-
-
- com.sun.xml.bind
- jaxb-core
- 2.3.0
-
-
-
- com.sun.xml.bind
- jaxb-impl
- 2.3.0
-
-
org.glassfish.jersey.containers