diff --git a/README.md b/README.md index 6daf5871..dd5ff95c 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,6 @@ or download the sources from GitHub. From the `docs-parent` directory: - mvn -Pinit validate -N mvn clean -DskipTests install #### Run a stand-alone version diff --git a/docs-core/pom.xml b/docs-core/pom.xml index 3ad8e390..d5f915ab 100644 --- a/docs-core/pom.xml +++ b/docs-core/pom.xml @@ -113,11 +113,6 @@ bcprov-jdk15on - - com.levigo.jbig2 - levigo-jbig2-imageio - - fr.opensagres.xdocreport org.odftoolkit.odfdom.converter.pdf @@ -127,16 +122,26 @@ fr.opensagres.xdocreport org.apache.poi.xwpf.converter.pdf - - + - jna + net.java.dev.jna jna + + + + com.levigo.jbig2 + levigo-jbig2-imageio + + + + com.twelvemonkeys.imageio + imageio-jpeg + - jai - imageio + com.github.jai-imageio + jai-imageio-core diff --git a/docs-core/src/main/java/com/sismics/tess4j/ImageIOHelper.java b/docs-core/src/main/java/com/sismics/tess4j/ImageIOHelper.java index 925f1c25..7c56a5ec 100644 --- a/docs-core/src/main/java/com/sismics/tess4j/ImageIOHelper.java +++ b/docs-core/src/main/java/com/sismics/tess4j/ImageIOHelper.java @@ -38,9 +38,9 @@ import javax.imageio.stream.ImageOutputStream; import org.w3c.dom.NodeList; -import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam; -import com.sun.media.imageioimpl.plugins.tiff.TIFFImageReaderSpi; -import com.sun.media.imageioimpl.plugins.tiff.TIFFImageWriterSpi; +import com.github.jaiimageio.impl.plugins.tiff.TIFFImageReaderSpi; +import com.github.jaiimageio.impl.plugins.tiff.TIFFImageWriterSpi; +import com.github.jaiimageio.plugins.tiff.TIFFImageWriteParam; public class ImageIOHelper { @@ -51,26 +51,26 @@ public class ImageIOHelper { * Gets pixel data of an * IIOImage object. * - * @param image an + * @param oimage an * IIOImage object * @return a byte buffer of pixel data * @throws Exception */ - public static ByteBuffer getImageByteBuffer(IIOImage image) throws IOException { - //Set up the writeParam - TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US); - tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); - - //Get tif writer and set output to file + public static ByteBuffer getImageByteBuffer(BufferedImage oimage) throws IOException { + // Get tif writer and set output to file ImageWriter writer = new TIFFImageWriterSpi().createWriterInstance(); - //Get the stream metadata + // Set up the writeParam + // We are using the old JAI ImageIO plugin, because for some reason, OCR don't work with TwelveMonkeys' plugin + ImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US); + tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); + + // Get the stream metadata IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream); writer.setOutput(ios); - writer.write(streamMetadata, new IIOImage(image.getRenderedImage(), null, null), tiffWriteParam); + writer.write(streamMetadata, new IIOImage(oimage, null, null), tiffWriteParam); writer.dispose(); // Read the writed image diff --git a/docs-core/src/main/java/com/sismics/tess4j/Tesseract.java b/docs-core/src/main/java/com/sismics/tess4j/Tesseract.java index 84719470..4111ce4c 100644 --- a/docs-core/src/main/java/com/sismics/tess4j/Tesseract.java +++ b/docs-core/src/main/java/com/sismics/tess4j/Tesseract.java @@ -17,7 +17,6 @@ package com.sismics.tess4j; import java.awt.Rectangle; import java.awt.image.BufferedImage; -import java.awt.image.RenderedImage; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -25,8 +24,6 @@ import java.util.Enumeration; import java.util.List; import java.util.Properties; -import javax.imageio.IIOImage; - import com.sun.jna.Pointer; /** @@ -169,9 +166,8 @@ public class Tesseract { * @throws TesseractException */ public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException { - IIOImage oimage = new IIOImage(bi, null, null); - List imageList = new ArrayList(); - imageList.add(oimage); + List imageList = new ArrayList(); + imageList.add(bi); return doOCR(imageList, rect); } @@ -179,23 +175,22 @@ public class Tesseract { * Performs OCR operation. * * @param imageList a list of - * IIOImage objects + * BufferedImage objects * @param rect the bounding rectangle defines the region of the image to be * recognized. A rectangle of zero dimension or * null indicates the whole image. * @return the recognized text * @throws TesseractException */ - public String doOCR(List imageList, Rectangle rect) throws TesseractException { + public String doOCR(List imageList, Rectangle rect) throws TesseractException { StringBuilder sb = new StringBuilder(); pageNum = 0; - for (IIOImage oimage : imageList) { + for (BufferedImage oimage : imageList) { pageNum++; try { ByteBuffer buf = ImageIOHelper.getImageByteBuffer(oimage); - RenderedImage ri = oimage.getRenderedImage(); - String pageText = doOCR(ri.getWidth(), ri.getHeight(), buf, rect, ri.getColorModel().getPixelSize()); + String pageText = doOCR(oimage.getWidth(), oimage.getHeight(), buf, rect, oimage.getColorModel().getPixelSize()); sb.append(pageText); } catch (IOException ioe) { //skip the problematic image diff --git a/docs-parent/lib/jai_imageio.jar b/docs-parent/lib/jai_imageio.jar deleted file mode 100644 index 571aa199..00000000 Binary files a/docs-parent/lib/jai_imageio.jar and /dev/null differ diff --git a/docs-parent/lib/jna.jar b/docs-parent/lib/jna.jar deleted file mode 100644 index 0827fcfa..00000000 Binary files a/docs-parent/lib/jna.jar and /dev/null differ diff --git a/docs-parent/pom.xml b/docs-parent/pom.xml index cf977d0b..7dbec5a4 100644 --- a/docs-parent/pom.xml +++ b/docs-parent/pom.xml @@ -35,8 +35,11 @@ 2.9.1 4.1.0.Final 3.1.0 - 1.6.3 1.0.5 + 4.2.1 + 3.2.1 + 1.6.5 + 1.3.1 9.2.13.v20150730 9.2.13.v20150730 @@ -69,14 +72,7 @@ true - - - jbig2.googlecode - JBIG2 ImageIO-Plugin repository at googlecode.com - http://jbig2-imageio.googlecode.com/svn/maven-repository - - @@ -369,90 +365,50 @@ - fr.opensagres.xdocreport - org.odftoolkit.odfdom.converter.pdf - ${fr.opensagres.xdocreport.version} - - - - fr.opensagres.xdocreport - org.apache.poi.xwpf.converter.pdf - ${fr.opensagres.xdocreport.version} - - - + fr.opensagres.xdocreport + org.odftoolkit.odfdom.converter.pdf + ${fr.opensagres.xdocreport.version} + + + fr.opensagres.xdocreport + org.apache.poi.xwpf.converter.pdf + ${fr.opensagres.xdocreport.version} + + + + com.twelvemonkeys.servlet + servlet + ${com.twelvemonkeys.imageio.version} + + + + + net.java.dev.jna + jna + ${net.java.dev.jna.jna.version} + + + + + com.twelvemonkeys.imageio + imageio-jpeg + ${com.twelvemonkeys.imageio.version} + + + com.levigo.jbig2 levigo-jbig2-imageio ${com.levigo.jbig2.levigo-jbig2-imageio.version} - - - jna - jna - 1.0 - - - - jai - imageio - 1.0 + + com.github.jai-imageio + jai-imageio-core + ${com.github.jai-imageio.jai-imageio-core.version} - - - init - - - - - org.apache.maven.plugins - maven-install-plugin - 2.3.1 - - - - install-jna - validate - - ${project.basedir}/lib/jna.jar - default - jna - jna - 1.0 - jar - true - - - install-file - - - - - install-jai-imageio - validate - - ${project.basedir}/lib/jai_imageio.jar - default - jai - imageio - 1.0 - jar - true - - - install-file - - - - - - - - - diff --git a/docs-web/pom.xml b/docs-web/pom.xml index 15a7a835..744c698e 100644 --- a/docs-web/pom.xml +++ b/docs-web/pom.xml @@ -83,6 +83,11 @@ com.h2database h2 + + + com.twelvemonkeys.servlet + servlet + diff --git a/docs-web/src/main/webapp/WEB-INF/web.xml b/docs-web/src/main/webapp/WEB-INF/web.xml index 0c5629fb..3b14ff94 100644 --- a/docs-web/src/main/webapp/WEB-INF/web.xml +++ b/docs-web/src/main/webapp/WEB-INF/web.xml @@ -7,6 +7,12 @@ metadata-complete="true"> Docs + + + ImageIO service provider loader/unloader + com.twelvemonkeys.servlet.image.IIOProviderContextListener + + requestContextFilter