/**
* Copyright @ 2012 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.sismics.tess4j;
import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;
import javax.imageio.IIOImage;
import com.sun.jna.Pointer;
/**
* An object layer on top of
* TessAPI
, provides character recognition support for common image
* formats, and multi-page TIFF images beyond the uncompressed, binary TIFF
* format supported by Tesseract OCR engine. The extended capabilities are
* provided by the
* Java Advanced Imaging Image I/O Tools
.
Support for
* PDF documents is available through
* Ghost4J
, a
* JNA
wrapper for
* GPL Ghostscript
, which should be installed and included in
* system path.
Any program that uses the library will need to
* ensure that the required libraries (the
* .jar
files for
* jna
,
* jai-imageio
, and
* ghost4j
) are in its compile and run-time
* classpath
.
*/
public class Tesseract {
private static Tesseract instance;
private final static Rectangle EMPTY_RECTANGLE = new Rectangle();
private String language = "eng";
private String datapath = null;
private int psm = TessAPI.TessPageSegMode.PSM_AUTO;
private boolean hocr;
private int pageNum;
private int ocrEngineMode = TessAPI.TessOcrEngineMode.OEM_DEFAULT;
private Properties prop = new Properties();
public final static String htmlBeginTag =
"\n"
+ "\n
tessedit_create_hocr
,
* tessedit_char_whitelist
, etc.
* @param value value for corresponding variable, e.g., "1", "0",
* "0123456789", etc.
*/
public void setTessVariable(String key, String value) {
prop.setProperty(key, value);
}
/**
* Performs OCR operation.
*
* @param bi a buffered image
* @return the recognized text
* @throws TesseractException
*/
public String doOCR(BufferedImage bi) throws TesseractException {
return doOCR(bi, null);
}
/**
* Performs OCR operation.
*
* @param bi a buffered image
* @param rect the bounding rectangle defines the region of the image to be
* recognized. A rectangle of zero dimension or
* null
indicates the whole image.
* @return the recognized text
* @throws TesseractException
*/
public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException {
IIOImage oimage = new IIOImage(bi, null, null);
ListIIOImage
objects
* @param rect the bounding rectangle defines the region of the image to be
* recognized. A rectangle of zero dimension or
* null
indicates the whole image.
* @return the recognized text
* @throws TesseractException
*/
public String doOCR(ListSetImage
, (optionally)
* SetRectangle
, and one or more of the
* Get*Text
functions.
*
* @param xsize width of image
* @param ysize height of image
* @param buf pixel data
* @param rect the bounding rectangle defines the region of the image to be
* recognized. A rectangle of zero dimension or
* null
indicates the whole image.
* @param bpp bits per pixel, represents the bit depth of the image, with 1
* for binary bitmap, 8 for gray, and 24 for color RGB.
* @return the recognized text
* @throws TesseractException
*/
public String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException {
TessAPI api = TessAPI.INSTANCE;
TessAPI.TessBaseAPI handle = api.TessBaseAPICreate();
api.TessBaseAPIInit2(handle, datapath, language, ocrEngineMode);
api.TessBaseAPISetPageSegMode(handle, psm);
Enumeration> em = prop.propertyNames();
while (em.hasMoreElements()) {
String key = (String) em.nextElement();
api.TessBaseAPISetVariable(handle, key, prop.getProperty(key));
}
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(xsize * bpp / 8.0);
api.TessBaseAPISetImage(handle, buf, xsize, ysize, bytespp, bytespl);
if (rect != null && !rect.equals(EMPTY_RECTANGLE)) {
api.TessBaseAPISetRectangle(handle, rect.x, rect.y, rect.width, rect.height);
}
Pointer utf8Text = hocr ? api.TessBaseAPIGetHOCRText(handle, pageNum - 1) : api.TessBaseAPIGetUTF8Text(handle);
String str = utf8Text.getString(0);
api.TessDeleteText(utf8Text);
api.TessBaseAPIDelete(handle);
return str;
}
}