mirror of
https://github.com/sismics/docs.git
synced 2024-11-22 14:07:55 +01:00
Closes #59: Use TwelveMonkeys' ImageIO plugin for JPEG
This commit is contained in:
parent
456fc5b991
commit
3172a5f216
@ -60,7 +60,6 @@ or download the sources from GitHub.
|
|||||||
|
|
||||||
From the `docs-parent` directory:
|
From the `docs-parent` directory:
|
||||||
|
|
||||||
mvn -Pinit validate -N
|
|
||||||
mvn clean -DskipTests install
|
mvn clean -DskipTests install
|
||||||
|
|
||||||
#### Run a stand-alone version
|
#### Run a stand-alone version
|
||||||
|
@ -113,11 +113,6 @@
|
|||||||
<artifactId>bcprov-jdk15on</artifactId>
|
<artifactId>bcprov-jdk15on</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.levigo.jbig2</groupId>
|
|
||||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>fr.opensagres.xdocreport</groupId>
|
<groupId>fr.opensagres.xdocreport</groupId>
|
||||||
<artifactId>org.odftoolkit.odfdom.converter.pdf</artifactId>
|
<artifactId>org.odftoolkit.odfdom.converter.pdf</artifactId>
|
||||||
@ -128,15 +123,25 @@
|
|||||||
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
|
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- OCR dependencies -->
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>jna</groupId>
|
<groupId>net.java.dev.jna</groupId>
|
||||||
<artifactId>jna</artifactId>
|
<artifactId>jna</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- ImageIO plugins -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>jai</groupId>
|
<groupId>com.levigo.jbig2</groupId>
|
||||||
<artifactId>imageio</artifactId>
|
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twelvemonkeys.imageio</groupId>
|
||||||
|
<artifactId>imageio-jpeg</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.github.jai-imageio</groupId>
|
||||||
|
<artifactId>jai-imageio-core</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Test dependencies -->
|
<!-- Test dependencies -->
|
||||||
|
@ -38,9 +38,9 @@ import javax.imageio.stream.ImageOutputStream;
|
|||||||
|
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
|
|
||||||
import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;
|
import com.github.jaiimageio.impl.plugins.tiff.TIFFImageReaderSpi;
|
||||||
import com.sun.media.imageioimpl.plugins.tiff.TIFFImageReaderSpi;
|
import com.github.jaiimageio.impl.plugins.tiff.TIFFImageWriterSpi;
|
||||||
import com.sun.media.imageioimpl.plugins.tiff.TIFFImageWriterSpi;
|
import com.github.jaiimageio.plugins.tiff.TIFFImageWriteParam;
|
||||||
|
|
||||||
public class ImageIOHelper {
|
public class ImageIOHelper {
|
||||||
|
|
||||||
@ -51,26 +51,26 @@ public class ImageIOHelper {
|
|||||||
* Gets pixel data of an
|
* Gets pixel data of an
|
||||||
* <code>IIOImage</code> object.
|
* <code>IIOImage</code> object.
|
||||||
*
|
*
|
||||||
* @param image an
|
* @param oimage an
|
||||||
* <code>IIOImage</code> object
|
* <code>IIOImage</code> object
|
||||||
* @return a byte buffer of pixel data
|
* @return a byte buffer of pixel data
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
public static ByteBuffer getImageByteBuffer(IIOImage image) throws IOException {
|
public static ByteBuffer getImageByteBuffer(BufferedImage oimage) throws IOException {
|
||||||
//Set up the writeParam
|
|
||||||
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
|
||||||
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
|
|
||||||
|
|
||||||
// Get tif writer and set output to file
|
// Get tif writer and set output to file
|
||||||
ImageWriter writer = new TIFFImageWriterSpi().createWriterInstance();
|
ImageWriter writer = new TIFFImageWriterSpi().createWriterInstance();
|
||||||
|
|
||||||
|
// Set up the writeParam
|
||||||
|
// We are using the old JAI ImageIO plugin, because for some reason, OCR don't work with TwelveMonkeys' plugin
|
||||||
|
ImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US);
|
||||||
|
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
|
||||||
|
|
||||||
// Get the stream metadata
|
// Get the stream metadata
|
||||||
IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
|
IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam);
|
||||||
|
|
||||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||||
ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream);
|
ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream);
|
||||||
writer.setOutput(ios);
|
writer.setOutput(ios);
|
||||||
writer.write(streamMetadata, new IIOImage(image.getRenderedImage(), null, null), tiffWriteParam);
|
writer.write(streamMetadata, new IIOImage(oimage, null, null), tiffWriteParam);
|
||||||
writer.dispose();
|
writer.dispose();
|
||||||
|
|
||||||
// Read the writed image
|
// Read the writed image
|
||||||
|
@ -17,7 +17,6 @@ package com.sismics.tess4j;
|
|||||||
|
|
||||||
import java.awt.Rectangle;
|
import java.awt.Rectangle;
|
||||||
import java.awt.image.BufferedImage;
|
import java.awt.image.BufferedImage;
|
||||||
import java.awt.image.RenderedImage;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -25,8 +24,6 @@ import java.util.Enumeration;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
import javax.imageio.IIOImage;
|
|
||||||
|
|
||||||
import com.sun.jna.Pointer;
|
import com.sun.jna.Pointer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -169,9 +166,8 @@ public class Tesseract {
|
|||||||
* @throws TesseractException
|
* @throws TesseractException
|
||||||
*/
|
*/
|
||||||
public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException {
|
public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException {
|
||||||
IIOImage oimage = new IIOImage(bi, null, null);
|
List<BufferedImage> imageList = new ArrayList<BufferedImage>();
|
||||||
List<IIOImage> imageList = new ArrayList<IIOImage>();
|
imageList.add(bi);
|
||||||
imageList.add(oimage);
|
|
||||||
return doOCR(imageList, rect);
|
return doOCR(imageList, rect);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,23 +175,22 @@ public class Tesseract {
|
|||||||
* Performs OCR operation.
|
* Performs OCR operation.
|
||||||
*
|
*
|
||||||
* @param imageList a list of
|
* @param imageList a list of
|
||||||
* <code>IIOImage</code> objects
|
* <code>BufferedImage</code> objects
|
||||||
* @param rect the bounding rectangle defines the region of the image to be
|
* @param rect the bounding rectangle defines the region of the image to be
|
||||||
* recognized. A rectangle of zero dimension or
|
* recognized. A rectangle of zero dimension or
|
||||||
* <code>null</code> indicates the whole image.
|
* <code>null</code> indicates the whole image.
|
||||||
* @return the recognized text
|
* @return the recognized text
|
||||||
* @throws TesseractException
|
* @throws TesseractException
|
||||||
*/
|
*/
|
||||||
public String doOCR(List<IIOImage> imageList, Rectangle rect) throws TesseractException {
|
public String doOCR(List<BufferedImage> imageList, Rectangle rect) throws TesseractException {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
pageNum = 0;
|
pageNum = 0;
|
||||||
|
|
||||||
for (IIOImage oimage : imageList) {
|
for (BufferedImage oimage : imageList) {
|
||||||
pageNum++;
|
pageNum++;
|
||||||
try {
|
try {
|
||||||
ByteBuffer buf = ImageIOHelper.getImageByteBuffer(oimage);
|
ByteBuffer buf = ImageIOHelper.getImageByteBuffer(oimage);
|
||||||
RenderedImage ri = oimage.getRenderedImage();
|
String pageText = doOCR(oimage.getWidth(), oimage.getHeight(), buf, rect, oimage.getColorModel().getPixelSize());
|
||||||
String pageText = doOCR(ri.getWidth(), ri.getHeight(), buf, rect, ri.getColorModel().getPixelSize());
|
|
||||||
sb.append(pageText);
|
sb.append(pageText);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
//skip the problematic image
|
//skip the problematic image
|
||||||
|
Binary file not shown.
Binary file not shown.
@ -35,8 +35,11 @@
|
|||||||
<joda-time.joda-time.version>2.9.1</joda-time.joda-time.version>
|
<joda-time.joda-time.version>2.9.1</joda-time.joda-time.version>
|
||||||
<org.hibernate.hibernate.version>4.1.0.Final</org.hibernate.hibernate.version>
|
<org.hibernate.hibernate.version>4.1.0.Final</org.hibernate.hibernate.version>
|
||||||
<javax.servlet.javax.servlet-api.version>3.1.0</javax.servlet.javax.servlet-api.version>
|
<javax.servlet.javax.servlet-api.version>3.1.0</javax.servlet.javax.servlet-api.version>
|
||||||
<com.levigo.jbig2.levigo-jbig2-imageio.version>1.6.3</com.levigo.jbig2.levigo-jbig2-imageio.version>
|
|
||||||
<fr.opensagres.xdocreport.version>1.0.5</fr.opensagres.xdocreport.version>
|
<fr.opensagres.xdocreport.version>1.0.5</fr.opensagres.xdocreport.version>
|
||||||
|
<net.java.dev.jna.jna.version>4.2.1</net.java.dev.jna.jna.version>
|
||||||
|
<com.twelvemonkeys.imageio.version>3.2.1</com.twelvemonkeys.imageio.version>
|
||||||
|
<com.levigo.jbig2.levigo-jbig2-imageio.version>1.6.5</com.levigo.jbig2.levigo-jbig2-imageio.version>
|
||||||
|
<com.github.jai-imageio.jai-imageio-core.version>1.3.1</com.github.jai-imageio.jai-imageio-core.version>
|
||||||
|
|
||||||
<org.eclipse.jetty.jetty-server.version>9.2.13.v20150730</org.eclipse.jetty.jetty-server.version>
|
<org.eclipse.jetty.jetty-server.version>9.2.13.v20150730</org.eclipse.jetty.jetty-server.version>
|
||||||
<org.eclipse.jetty.jetty-webapp.version>9.2.13.v20150730</org.eclipse.jetty.jetty-webapp.version>
|
<org.eclipse.jetty.jetty-webapp.version>9.2.13.v20150730</org.eclipse.jetty.jetty-webapp.version>
|
||||||
@ -69,15 +72,8 @@
|
|||||||
<enabled>true</enabled>
|
<enabled>true</enabled>
|
||||||
</snapshots>
|
</snapshots>
|
||||||
</repository>
|
</repository>
|
||||||
|
|
||||||
<repository>
|
|
||||||
<id>jbig2.googlecode</id>
|
|
||||||
<name>JBIG2 ImageIO-Plugin repository at googlecode.com</name>
|
|
||||||
<url>http://jbig2-imageio.googlecode.com/svn/maven-repository</url>
|
|
||||||
</repository>
|
|
||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@ -380,79 +376,39 @@
|
|||||||
<version>${fr.opensagres.xdocreport.version}</version>
|
<version>${fr.opensagres.xdocreport.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Used to read JBIG2 images. See https://github.com/sismics/docs/issues/38 -->
|
<dependency> <!-- Servlet listener to register SPI ImageIO plugins -->
|
||||||
|
<groupId>com.twelvemonkeys.servlet</groupId>
|
||||||
|
<artifactId>servlet</artifactId>
|
||||||
|
<version>${com.twelvemonkeys.imageio.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- JNA for Tesseract -->
|
||||||
<dependency>
|
<dependency>
|
||||||
|
<groupId>net.java.dev.jna</groupId>
|
||||||
|
<artifactId>jna</artifactId>
|
||||||
|
<version>${net.java.dev.jna.jna.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- ImageIO plugins -->
|
||||||
|
<dependency> <!-- Permissive JPEG plugin -->
|
||||||
|
<groupId>com.twelvemonkeys.imageio</groupId>
|
||||||
|
<artifactId>imageio-jpeg</artifactId>
|
||||||
|
<version>${com.twelvemonkeys.imageio.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency><!-- Only JBIG2 -->
|
||||||
<groupId>com.levigo.jbig2</groupId>
|
<groupId>com.levigo.jbig2</groupId>
|
||||||
<artifactId>levigo-jbig2-imageio</artifactId>
|
<artifactId>levigo-jbig2-imageio</artifactId>
|
||||||
<version>${com.levigo.jbig2.levigo-jbig2-imageio.version}</version>
|
<version>${com.levigo.jbig2.levigo-jbig2-imageio.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- OCR dependencies -->
|
<dependency><!-- Essentially TIFF (for OCR) -->
|
||||||
<dependency>
|
<groupId>com.github.jai-imageio</groupId>
|
||||||
<groupId>jna</groupId>
|
<artifactId>jai-imageio-core</artifactId>
|
||||||
<artifactId>jna</artifactId>
|
<version>${com.github.jai-imageio.jai-imageio-core.version}</version>
|
||||||
<version>1.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>jai</groupId>
|
|
||||||
<artifactId>imageio</artifactId>
|
|
||||||
<version>1.0</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
<profiles>
|
|
||||||
<profile>
|
|
||||||
<id>init</id>
|
|
||||||
|
|
||||||
<build>
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-install-plugin</artifactId>
|
|
||||||
<version>2.3.1</version>
|
|
||||||
<executions>
|
|
||||||
|
|
||||||
<execution>
|
|
||||||
<id>install-jna</id>
|
|
||||||
<phase>validate</phase>
|
|
||||||
<configuration>
|
|
||||||
<file>${project.basedir}/lib/jna.jar</file>
|
|
||||||
<repositoryLayout>default</repositoryLayout>
|
|
||||||
<groupId>jna</groupId>
|
|
||||||
<artifactId>jna</artifactId>
|
|
||||||
<version>1.0</version>
|
|
||||||
<packaging>jar</packaging>
|
|
||||||
<generatePom>true</generatePom>
|
|
||||||
</configuration>
|
|
||||||
<goals>
|
|
||||||
<goal>install-file</goal>
|
|
||||||
</goals>
|
|
||||||
</execution>
|
|
||||||
|
|
||||||
<execution>
|
|
||||||
<id>install-jai-imageio</id>
|
|
||||||
<phase>validate</phase>
|
|
||||||
<configuration>
|
|
||||||
<file>${project.basedir}/lib/jai_imageio.jar</file>
|
|
||||||
<repositoryLayout>default</repositoryLayout>
|
|
||||||
<groupId>jai</groupId>
|
|
||||||
<artifactId>imageio</artifactId>
|
|
||||||
<version>1.0</version>
|
|
||||||
<packaging>jar</packaging>
|
|
||||||
<generatePom>true</generatePom>
|
|
||||||
</configuration>
|
|
||||||
<goals>
|
|
||||||
<goal>install-file</goal>
|
|
||||||
</goals>
|
|
||||||
</execution>
|
|
||||||
|
|
||||||
</executions>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
|
||||||
</profile>
|
|
||||||
</profiles>
|
|
||||||
</project>
|
</project>
|
||||||
|
@ -84,6 +84,11 @@
|
|||||||
<artifactId>h2</artifactId>
|
<artifactId>h2</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twelvemonkeys.servlet</groupId>
|
||||||
|
<artifactId>servlet</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Test dependencies -->
|
<!-- Test dependencies -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.sismics.docs</groupId>
|
<groupId>com.sismics.docs</groupId>
|
||||||
|
@ -7,6 +7,12 @@
|
|||||||
metadata-complete="true">
|
metadata-complete="true">
|
||||||
<display-name>Docs</display-name>
|
<display-name>Docs</display-name>
|
||||||
|
|
||||||
|
<!-- Proper loader/unloader of ImageIO plugins -->
|
||||||
|
<listener>
|
||||||
|
<display-name>ImageIO service provider loader/unloader</display-name>
|
||||||
|
<listener-class>com.twelvemonkeys.servlet.image.IIOProviderContextListener</listener-class>
|
||||||
|
</listener>
|
||||||
|
|
||||||
<!-- This filter is used to process a couple things in the request context -->
|
<!-- This filter is used to process a couple things in the request context -->
|
||||||
<filter>
|
<filter>
|
||||||
<filter-name>requestContextFilter</filter-name>
|
<filter-name>requestContextFilter</filter-name>
|
||||||
|
Loading…
Reference in New Issue
Block a user