diff --git a/docs-core/pom.xml b/docs-core/pom.xml index 5777ab3f..f9d5932d 100644 --- a/docs-core/pom.xml +++ b/docs-core/pom.xml @@ -138,11 +138,6 @@ imageio - - tess4j - tess4j - - junit diff --git a/docs-core/src/main/java/net/sourceforge/tess4j/TessAPI.java b/docs-core/src/main/java/net/sourceforge/tess4j/TessAPI.java new file mode 100644 index 00000000..b48782da --- /dev/null +++ b/docs-core/src/main/java/net/sourceforge/tess4j/TessAPI.java @@ -0,0 +1,686 @@ +/** + * Copyright @ 2012 Quan Nguyen + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package net.sourceforge.tess4j; + +import com.sun.jna.*; +import com.sun.jna.ptr.*; +import java.nio.*; + +/** + * A Java wrapper for + * Tesseract OCR 3.02 API using + * JNA Interface Mapping. + */ +public interface TessAPI extends Library { + + static final boolean WINDOWS = System.getProperty("os.name").toLowerCase().startsWith("windows"); + /** + * Native library name. + */ + public static final String LIB_NAME = "libtesseract302"; + public static final String LIB_NAME_NON_WIN = "tesseract"; + /** + * An instance of the class library. + */ + public static final TessAPI INSTANCE = (TessAPI) Native.loadLibrary(WINDOWS ? LIB_NAME : LIB_NAME_NON_WIN, TessAPI.class); + + /** + * When Tesseract/Cube is initialized we can choose to instantiate/load/run + * only the Tesseract part, only the Cube part or both along with the + * combiner. The preference of which engine to use is stored in + * tessedit_ocr_engine_mode.

ATTENTION: When + * modifying this enum, please make sure to make the appropriate changes to + * all the enums mirroring it (e.g. OCREngine in + * cityblock/workflow/detection/detection_storage.proto). Such enums will + * mention the connection to OcrEngineMode in the comments. + */ + public static interface TessOcrEngineMode { + + public static final int OEM_TESSERACT_ONLY = (int) 0; + public static final int OEM_CUBE_ONLY = (int) 1; + public static final int OEM_TESSERACT_CUBE_COMBINED = (int) 2; + public static final int OEM_DEFAULT = (int) 3; + }; + + /** + * Possible modes for page layout analysis. These *must* be kept in order of + * decreasing amount of layout analysis to be done, except for + * OSD_ONLY, so that the inequality test macros below work. + */ + public static interface TessPageSegMode { + + public static final int PSM_OSD_ONLY = (int) 0; + public static final int PSM_AUTO_OSD = (int) 1; + public static final int PSM_AUTO_ONLY = (int) 2; + public static final int PSM_AUTO = (int) 3; + public static final int PSM_SINGLE_COLUMN = (int) 4; + public static final int PSM_SINGLE_BLOCK_VERT_TEXT = (int) 5; + public static final int PSM_SINGLE_BLOCK = (int) 6; + public static final int PSM_SINGLE_LINE = (int) 7; + public static final int PSM_SINGLE_WORD = (int) 8; + public static final int PSM_CIRCLE_WORD = (int) 9; + public static final int PSM_SINGLE_CHAR = (int) 10; + public static final int PSM_COUNT = (int) 11; + }; + + /** + * Enum of the elements of the page hierarchy, used in + * ResultIterator to provide functions that operate on each + * level without having to have 5x as many functions. + */ + public static interface TessPageIteratorLevel { + + public static final int RIL_BLOCK = (int) 0; + public static final int RIL_PARA = (int) 1; + public static final int RIL_TEXTLINE = (int) 2; + public static final int RIL_WORD = (int) 3; + public static final int RIL_SYMBOL = (int) 4; + }; + + public static interface TessPolyBlockType { + + public static final int PT_UNKNOWN = (int) 0; + public static final int PT_FLOWING_TEXT = (int) 1; + public static final int PT_HEADING_TEXT = (int) 2; + public static final int PT_PULLOUT_TEXT = (int) 3; + public static final int PT_TABLE = (int) 4; + public static final int PT_VERTICAL_TEXT = (int) 5; + public static final int PT_CAPTION_TEXT = (int) 6; + public static final int PT_FLOWING_IMAGE = (int) 7; + public static final int PT_HEADING_IMAGE = (int) 8; + public static final int PT_PULLOUT_IMAGE = (int) 9; + public static final int PT_HORZ_LINE = (int) 10; + public static final int PT_VERT_LINE = (int) 11; + public static final int PT_NOISE = (int) 12; + public static final int PT_COUNT = (int) 13; + }; + + /** + *
+     *  +------------------+
+     *  | 1 Aaaa Aaaa Aaaa |
+     *  | Aaa aa aaa aa    |
+     *  | aaaaaa A aa aaa. |
+     *  |                2 |
+     *  |   #######  c c C |
+     *  |   #######  c c c |
+     *  | < #######  c c c |
+     *  | < #######  c   c |
+     *  | < #######  .   c |
+     *  | 3 #######      c |
+     *  +------------------+
+     * 
+ * Orientation Example:
+ * ====================
+ * Above is a + * diagram of some (1) English and (2) Chinese text and a (3) photo + * credit.
+ *
+ * Upright Latin characters are represented as A and a. '<' represents + * a latin character rotated anti-clockwise 90 degrees. Upright + * Chinese characters are represented C and c.
+ *
+ * NOTA BENE: enum values here should match goodoc.proto
+ *
+ * If you orient your head so that "up" aligns with Orientation, then + * the characters will appear "right side up" and readable.
+ *
+ * In the example above, both the + * English and Chinese paragraphs are oriented so their "up" is the top of + * the page (page up). The photo credit is read with one's head turned + * leftward ("up" is to page left).
+ *
The values of this enum + * match the convention of Tesseract's osdetect.h + */ + public static interface TessOrientation { + + public static final int ORIENTATION_PAGE_UP = (int) 0; + public static final int ORIENTATION_PAGE_RIGHT = (int) 1; + public static final int ORIENTATION_PAGE_DOWN = (int) 2; + public static final int ORIENTATION_PAGE_LEFT = (int) 3; + }; + + /** + * The grapheme clusters within a line of text are laid out logically in + * this direction, judged when looking at the text line rotated so that its + * Orientation is "page up".

For English text, the writing + * direction is left-to-right. For the Chinese text in the above example, + * the writing direction is top-to-bottom. + */ + public static interface TessWritingDirection { + + public static final int WRITING_DIRECTION_LEFT_TO_RIGHT = (int) 0; + public static final int WRITING_DIRECTION_RIGHT_TO_LEFT = (int) 1; + public static final int WRITING_DIRECTION_TOP_TO_BOTTOM = (int) 2; + }; + + /** + * The text lines are read in the given sequence.

In English, + * the order is top-to-bottom. In Chinese, vertical text lines are read + * right-to-left. Mongolian is written in vertical columns top to bottom + * like Chinese, but the lines order left-to right.

Note that + * only some combinations make sense. For example, + * WRITING_DIRECTION_LEFT_TO_RIGHT implies + * TEXTLINE_ORDER_TOP_TO_BOTTOM. + */ + public static interface TessTextlineOrder { + + public static final int TEXTLINE_ORDER_LEFT_TO_RIGHT = (int) 0; + public static final int TEXTLINE_ORDER_RIGHT_TO_LEFT = (int) 1; + public static final int TEXTLINE_ORDER_TOP_TO_BOTTOM = (int) 2; + }; + public static final int TRUE = (int) 1; + public static final int FALSE = (int) 0; + + /** + * Returns the version identifier. + */ + String TessVersion(); + + void TessDeleteText(Pointer text); + + void TessDeleteTextArray(PointerByReference arr); + + void TessDeleteIntArray(IntBuffer arr); + + /** + * Creates an instance of the base class for all Tesseract APIs. + */ + TessAPI.TessBaseAPI TessBaseAPICreate(); + + /** + * Disposes the TesseractAPI instance. + */ + void TessBaseAPIDelete(TessAPI.TessBaseAPI handle); + + /** + * Set the name of the input file. Needed only for training and reading a + * UNLV zone file. + */ + void TessBaseAPISetInputName(TessAPI.TessBaseAPI handle, String name); + + /** + * Set the name of the bonus output files. Needed only for debugging. + */ + void TessBaseAPISetOutputName(TessAPI.TessBaseAPI handle, String name); + + /** + * Set the value of an internal "parameter." Supply the name of the + * parameter and the value as a string, just as you would in a config file. + * Returns false if the name lookup failed. E.g., + * SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, + * y and z. Or + * SetVariable("classify_bln_numeric_mode", "1"); to set + * numeric-only mode. + * SetVariable may be used before + * Init, but settings will revert to defaults on + * End().

Note: Must be called after + * Init(). Only works for non-init variables (init variables + * should be passed to + * Init()). + */ + int TessBaseAPISetVariable(TessAPI.TessBaseAPI handle, String name, String value); + + /** + * Returns true (1) if the parameter was found among Tesseract parameters. + * Fills in value with the value of the parameter. + */ + int TessBaseAPIGetIntVariable(TessAPI.TessBaseAPI handle, String name, IntBuffer value); + + int TessBaseAPIGetBoolVariable(TessAPI.TessBaseAPI handle, String name, IntBuffer value); + + int TessBaseAPIGetDoubleVariable(TessAPI.TessBaseAPI handle, String name, DoubleBuffer value); + + String TessBaseAPIGetStringVariable(TessAPI.TessBaseAPI handle, String name); + + /** + * Print Tesseract parameters to the given file.

Note: Must not + * be the first method called after instance create. + */ + void TessBaseAPIPrintVariablesToFile(TessAPI.TessBaseAPI handle, String filename); + + /** + * Instances are now mostly thread-safe and totally independent, but some + * global parameters remain. Basically it is safe to use multiple + * TessBaseAPIs in different threads in parallel, UNLESS: you use + * SetVariable on some of the Params in classify and textord. + * If you do, then the effect will be to change it for all your + * instances.

Start tesseract. Returns zero on success and -1 + * on failure. NOTE that the only members that may be called before Init are + * those listed above here in the class definition.

The + * datapath must be the name of the parent directory of + * tessdata and must end in / . Any name after the last / will be stripped. + * The language is (usually) an + * ISO 639-3 string or + * NULL will default to eng. It is entirely safe (and + * eventually will be efficient too) to call Init multiple times on the same + * instance to change language, or just to reset the classifier. The + * language may be a string of the form [~][+[~]]* indicating + * that multiple languages are to be loaded. E.g., hin+eng will load Hindi + * and English. Languages may specify internally that they want to be loaded + * with one or more other languages, so the ~ sign is available to override + * that. E.g., if hin were set to load eng by default, then hin+~eng would + * force loading only hin. The number of loaded languages is limited only by + * memory, with the caveat that loading additional languages will impact + * both speed and accuracy, as there is more work to do to decide on the + * applicable language, and there is more chance of hallucinating incorrect + * words. WARNING: On changing languages, all Tesseract parameters are reset + * back to their default values. (Which may vary between languages.) If you + * have a rare need to set a Variable that controls initialization for a + * second call to + * Init you should explicitly call + * End() and then use + * SetVariable before + * Init. This is only a very rare use case, since there are + * very few uses that require any parameters to be set before + * Init.

If + * set_only_non_debug_params is true, only params that do not + * contain "debug" in the name will be set. + */ + int TessBaseAPIInit1(TessAPI.TessBaseAPI handle, String datapath, String language, int oem, PointerByReference configs, int configs_size); + + int TessBaseAPIInit2(TessAPI.TessBaseAPI handle, String datapath, String language, int oem); + + int TessBaseAPIInit3(TessAPI.TessBaseAPI handle, String datapath, String language); + + /** + * Returns the languages string used in the last valid initialization. If + * the last initialization specified "deu+hin" then that will be returned. + * If hin loaded eng automatically as well, then that will not be included + * in this list. To find the languages actually loaded, use + * GetLoadedLanguagesAsVector. The returned string should NOT + * be deleted. + */ + String TessBaseAPIGetInitLanguagesAsString(TessAPI.TessBaseAPI handle); + + /** + * Returns the loaded languages in the vector of STRINGs. Includes all + * languages loaded by the last + * Init, including those loaded as dependencies of other loaded + * languages. + */ + PointerByReference TessBaseAPIGetLoadedLanguagesAsVector(TessAPI.TessBaseAPI handle); + + /** + * Returns the available languages in the vector of STRINGs. + */ + PointerByReference TessBaseAPIGetAvailableLanguagesAsVector(TessAPI.TessBaseAPI handle); + + /** + * Init only the lang model component of Tesseract. The only functions that + * work after this init are + * SetVariable and + * IsValidWord. WARNING: temporary! This function will be + * removed from here and placed in a separate API at some future time. + */ + int TessBaseAPIInitLangMod(TessAPI.TessBaseAPI handle, String datapath, String language); + + /** + * Init only for page layout analysis. Use only for calls to + * SetImage and + * AnalysePage. Calls that attempt recognition will generate an + * error. + */ + void TessBaseAPIInitForAnalysePage(TessAPI.TessBaseAPI handle); + + /** + * Read a "config" file containing a set of param, value pairs. Searches the + * standard places: + * tessdata/configs, + * tessdata/tessconfigs and also accepts a relative or absolute + * path name. Note: only non-init params will be set (init params are set by + * Init()). + */ + void TessBaseAPIReadConfigFile(TessAPI.TessBaseAPI handle, String filename, int init_only); + + /** + * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK. The + * mode is stored as an IntParam so it can also be modified by + * ReadConfigFile or + * SetVariable("tessedit_pageseg_mode", mode as string). + */ + void TessBaseAPISetPageSegMode(TessAPI.TessBaseAPI handle, int mode); + + /** + * Return the current page segmentation mode. + */ + int TessBaseAPIGetPageSegMode(TessAPI.TessBaseAPI handle); + + /** + * Recognize a rectangle from an image and return the result as a string. + * May be called many times for a single + * Init. Currently has no error checking. Greyscale of 8 and + * color of 24 or 32 bits per pixel may be given. Palette color images will + * not work properly and must be converted to 24 bit. Binary images of 1 bit + * per pixel may also be given but they must be byte packed with the MSB of + * the first byte being the first pixel, and a 1 represents WHITE. For + * binary images set bytes_per_pixel=0. The recognized text is returned as a + * char* which is coded as UTF8 and must be freed with the delete [] + * operator.

Note that + * TesseractRect is the simplified convenience interface. For + * advanced uses, use + * SetImage, (optionally) + * SetRectangle, + * Recognize, and one or more of the + * Get*Text functions below. + */ + Pointer TessBaseAPIRect(TessAPI.TessBaseAPI handle, ByteBuffer imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height); + + /** + * Call between pages or documents etc to free up memory and forget adaptive + * data. + */ + void TessBaseAPIClearAdaptiveClassifier(TessAPI.TessBaseAPI handle); + + /** + * Provide an image for Tesseract to recognize. Format is as TesseractRect + * above. Does not copy the image buffer, or take ownership. The source + * image may be destroyed after Recognize is called, either explicitly or + * implicitly via one of the + * Get*Text functions. + * SetImage clears all recognition results, and sets the + * rectangle to the full image, so it may be followed immediately by a + * GetUTF8Text, and it will automatically perform recognition. + */ + void TessBaseAPISetImage(TessAPI.TessBaseAPI handle, ByteBuffer imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line); + + /** + * Set the resolution of the source image in pixels per inch so font size + * information can be calculated in results. Call this after SetImage(). + */ + void TessBaseAPISetSourceResolution(TessAPI.TessBaseAPI handle, int ppi); + + /** + * Restrict recognition to a sub-rectangle of the image. Call after + * SetImage. Each + * SetRectangle clears the recognition results so multiple + * rectangles can be recognized with the same image. + */ + void TessBaseAPISetRectangle(TessAPI.TessBaseAPI handle, int left, int top, int width, int height); + + /** Scale factor from original image. */ + int TessBaseAPIGetThresholdedImageScaleFactor(TessAPI.TessBaseAPI handle); + + /** Dump the internal binary image to a PGM file. */ + void TessBaseAPIDumpPGM(TessAPI.TessBaseAPI handle, String filename); + + /** + * Runs page layout analysis in the mode set by SetPageSegMode. May + * optionally be called prior to Recognize to get access to just the page + * layout results. Returns an iterator to the results. Returns NULL on + * error. The returned iterator must be deleted after use. WARNING! This + * class points to data held within the TessBaseAPI class, and therefore can + * only be used while the TessBaseAPI class still exists and has not been + * subjected to a call of + * Init, + * SetImage, + * Recognize, + * Clear, + * End, DetectOS, or anything else that changes the internal + * PAGE_RES. + */ + TessAPI.TessPageIterator TessBaseAPIAnalyseLayout(TessAPI.TessBaseAPI handle); + + /** + * Recognize the image from SetAndThresholdImage, generating Tesseract + * internal structures. Returns 0 on success. Optional. The + * Get*Text functions below will call + * Recognize if needed. After Recognize, the output is kept + * internally until the next + * SetImage. + */ + int TessBaseAPIRecognize(TessAPI.TessBaseAPI handle, TessAPI.ETEXT_DESC monitor); + + /** + * Variant on Recognize used for testing chopper. + */ + int TessBaseAPIRecognizeForChopTest(TessAPI.TessBaseAPI handle, TessAPI.ETEXT_DESC monitor); + + /** + * Get a reading-order iterator to the results of LayoutAnalysis and/or + * Recognize. The returned iterator must be deleted after use. WARNING! This + * class points to data held within the TessBaseAPI class, and therefore can + * only be used while the TessBaseAPI class still exists and has not been + * subjected to a call of + * Init, + * SetImage, + * Recognize, + * Clear, + * End, DetectOS, or anything else that changes the internal + * PAGE_RES. + */ + TessAPI.TessResultIterator TessBaseAPIGetIterator(TessAPI.TessBaseAPI handle); + + /** + * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. + * The returned iterator must be deleted after use. + * WARNING! This class points to data held within the TessBaseAPI class, and + * therefore can only be used while the TessBaseAPI class still exists and + * has not been subjected to a call of Init, SetImage, Recognize, Clear, End + * DetectOS, or anything else that changes the internal PAGE_RES. + */ + TessAPI.TessMutableIterator TessBaseAPIGetMutableIterator(TessAPI.TessBaseAPI handle); + + /** + * Recognizes all the pages in the named file, as a multi-page tiff or list + * of filenames, or single image, and gets the appropriate kind of text + * according to parameters: + * tessedit_create_boxfile, + * tessedit_make_boxes_from_boxes, + * tessedit_write_unlv, + * tessedit_create_hocr. Calls ProcessPage on each page in the + * input file, which may be a multi-page tiff, single-page other file + * format, or a plain text list of images to read. If tessedit_page_number + * is non-negative, processing begins at that page of a multi-page tiff + * file, or filelist. The text is returned in text_out. Returns false on + * error. If non-zero timeout_millisec terminates processing after the + * timeout on a single page. If non-NULL and non-empty, and some page fails + * for some reason, the page is reprocessed with the retry_config config + * file. Useful for interactively debugging a bad page. + */ + Pointer TessBaseAPIProcessPages(TessAPI.TessBaseAPI handle, String filename, String retry_config, int timeout_millisec); + + /** + * The recognized text is returned as a char* which is coded as UTF-8 and + * must be freed with the delete [] operator. + */ + Pointer TessBaseAPIGetUTF8Text(TessAPI.TessBaseAPI handle); + + /** + * Make a HTML-formatted string with hOCR markup from the internal data + * structures. page_number is 0-based but will appear in the output as + * 1-based. + */ + Pointer TessBaseAPIGetHOCRText(TessAPI.TessBaseAPI handle, int page_number); + + /** + * The recognized text is returned as a char* which is coded in the same + * format as a box file used in training. Returned string must be freed with + * the delete [] operator. Constructs coordinates in the original image - + * not just the rectangle. page_number is a 0-based page index that will + * appear in the box file. + */ + Pointer TessBaseAPIGetBoxText(TessAPI.TessBaseAPI handle, int page_number); + + /** + * The recognized text is returned as a char* which is coded as UNLV format + * Latin-1 with specific reject and suspect codes and must be freed with the + * delete [] operator. + */ + Pointer TessBaseAPIGetUNLVText(TessAPI.TessBaseAPI handle); + + /** + * Returns the (average) confidence value between 0 and 100. + */ + int TessBaseAPIMeanTextConf(TessAPI.TessBaseAPI handle); + + /** + * Returns all word confidences (between 0 and 100) in an array, terminated + * by -1. The calling function must delete [] after use. The number of + * confidences should correspond to the number of space-delimited words in + * GetUTF8Text. + */ + IntByReference TessBaseAPIAllWordConfidences(TessAPI.TessBaseAPI handle); + + /** + * Applies the given word to the adaptive classifier if possible. The word + * must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the + * boundaries of the graphemes. Assumes that SetImage/SetRectangle have been + * used to set the image to the given word. The mode arg should be + * PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control + * layout analysis. The currently set PageSegMode is preserved. Returns + * false if adaption was not possible for some reason. + */ + int TessBaseAPIAdaptToWordStr(TessAPI.TessBaseAPI handle, int mode, String wordstr); + + /** + * Free up recognition results and any stored image data, without actually + * freeing any recognition data that would be time-consuming to reload. + * Afterwards, you must call + * SetImage or + * TesseractRect before doing any + * Recognize or + * Get* operation. + */ + void TessBaseAPIClear(TessAPI.TessBaseAPI handle); + + /** + * Close down tesseract and free up all memory. + * End() is equivalent to destructing and reconstructing your + * TessBaseAPI. Once + * End() has been used, none of the other API functions may be + * used other than + * Init and anything declared above it in the class definition. + */ + void TessBaseAPIEnd(TessAPI.TessBaseAPI handle); + + /** + * Check whether a word is valid according to Tesseract's language model. + * + * @return 0 if the word is invalid, non-zero if valid. @warning temporary! + * This function will be removed from here and placed in a separate API at + * some future time. + */ + int TessBaseAPIIsValidWord(TessAPI.TessBaseAPI handle, String word); + + int TessBaseAPIGetTextDirection(TessAPI.TessBaseAPI handle, IntBuffer out_offset, FloatBuffer out_slope); + + /** + * This method returns the string form of the specified unichar. + */ + String TessBaseAPIGetUnichar(TessAPI.TessBaseAPI handle, int unichar_id); + + /* Page iterator */ + void TessPageIteratorDelete(TessAPI.TessPageIterator handle); + + TessAPI.TessPageIterator TessPageIteratorCopy(TessAPI.TessPageIterator handle); + + void TessPageIteratorBegin(TessAPI.TessPageIterator handle); + + int TessPageIteratorNext(TessAPI.TessPageIterator handle, int level); + + int TessPageIteratorIsAtBeginningOf(TessAPI.TessPageIterator handle, int level); + + int TessPageIteratorIsAtFinalElement(TessAPI.TessPageIterator handle, int level, int element); + + int TessPageIteratorBoundingBox(TessAPI.TessPageIterator handle, int level, IntBuffer left, IntBuffer top, IntBuffer right, IntBuffer bottom); + + int TessPageIteratorBlockType(TessAPI.TessPageIterator handle); + + int TessPageIteratorBaseline(TessAPI.TessPageIterator handle, int level, IntBuffer x1, IntBuffer y1, IntBuffer x2, IntBuffer y2); + + void TessPageIteratorOrientation(TessAPI.TessPageIterator handle, IntBuffer orientation, IntBuffer writing_direction, IntBuffer textline_order, FloatBuffer deskew_angle); + + /* Result iterator */ + void TessResultIteratorDelete(TessAPI.TessResultIterator handle); + + TessAPI.TessResultIterator TessResultIteratorCopy(TessAPI.TessResultIterator handle); + + TessAPI.TessPageIterator TessResultIteratorGetPageIterator(TessAPI.TessResultIterator handle); + + TessAPI.TessPageIterator TessResultIteratorGetPageIteratorConst(TessAPI.TessResultIterator handle); + + Pointer TessResultIteratorGetUTF8Text(TessAPI.TessResultIterator handle, int level); + + float TessResultIteratorConfidence(TessAPI.TessResultIterator handle, int level); + + String TessResultIteratorWordFontAttributes(TessAPI.TessResultIterator handle, IntBuffer is_bold, IntBuffer is_italic, IntBuffer is_underlined, IntBuffer is_monospace, IntBuffer is_serif, IntBuffer is_smallcaps, IntBuffer pointsize, IntBuffer font_id); + + int TessResultIteratorWordIsFromDictionary(TessAPI.TessResultIterator handle); + + int TessResultIteratorWordIsNumeric(TessAPI.TessResultIterator handle); + + int TessResultIteratorSymbolIsSuperscript(TessAPI.TessResultIterator handle); + + int TessResultIteratorSymbolIsSubscript(TessAPI.TessResultIterator handle); + + int TessResultIteratorSymbolIsDropcap(TessAPI.TessResultIterator handle); + + public static class TessBaseAPI extends PointerType { + + public TessBaseAPI(Pointer address) { + super(address); + } + + public TessBaseAPI() { + super(); + } + }; + + public static class ETEXT_DESC extends PointerType { + + public ETEXT_DESC(Pointer address) { + super(address); + } + + public ETEXT_DESC() { + super(); + } + }; + + public static class TessPageIterator extends PointerType { + + public TessPageIterator(Pointer address) { + super(address); + } + + public TessPageIterator() { + super(); + } + }; + + public static class TessMutableIterator extends PointerType { + + public TessMutableIterator(Pointer address) { + super(address); + } + + public TessMutableIterator() { + super(); + } + }; + + public static class TessResultIterator extends PointerType { + + public TessResultIterator(Pointer address) { + super(address); + } + + public TessResultIterator() { + super(); + } + }; +} diff --git a/docs-core/src/main/java/net/sourceforge/tess4j/Tesseract.java b/docs-core/src/main/java/net/sourceforge/tess4j/Tesseract.java new file mode 100644 index 00000000..ab3bee49 --- /dev/null +++ b/docs-core/src/main/java/net/sourceforge/tess4j/Tesseract.java @@ -0,0 +1,258 @@ +/** + * Copyright @ 2012 Quan Nguyen + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package net.sourceforge.tess4j; + +import java.awt.Rectangle; +import java.awt.image.BufferedImage; +import java.awt.image.RenderedImage; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.Properties; + +import javax.imageio.IIOImage; + +import net.sourceforge.vietocr.ImageIOHelper; + +import com.sun.jna.Pointer; + +/** + * An object layer on top of + * TessAPI, provides character recognition support for common image + * formats, and multi-page TIFF images beyond the uncompressed, binary TIFF + * format supported by Tesseract OCR engine. The extended capabilities are + * provided by the + * Java Advanced Imaging Image I/O Tools.

Support for + * PDF documents is available through + * Ghost4J, a + * JNA wrapper for + * GPL Ghostscript, which should be installed and included in + * system path.

Any program that uses the library will need to + * ensure that the required libraries (the + * .jar files for + * jna, + * jai-imageio, and + * ghost4j) are in its compile and run-time + * classpath. + */ +public class Tesseract { + + private static Tesseract instance; + private final static Rectangle EMPTY_RECTANGLE = new Rectangle(); + private String language = "eng"; + private String datapath = "tessdata"; + private int psm = TessAPI.TessPageSegMode.PSM_AUTO; + private boolean hocr; + private int pageNum; + private int ocrEngineMode = TessAPI.TessOcrEngineMode.OEM_DEFAULT; + private Properties prop = new Properties(); + public final static String htmlBeginTag = + "\n" + + "\n\n\n" + + "\n\n" + + "\n\n"; + public final static String htmlEndTag = "\n\n"; + + /** + * Private constructor. + */ + private Tesseract() { + System.setProperty("jna.encoding", "UTF8"); + } + + /** + * Gets an instance of the class library. + * + * @return instance + */ + public static synchronized Tesseract getInstance() { + if (instance == null) { + instance = new Tesseract(); + } + + return instance; + } + + /** + * Sets tessdata path. + * + * @param datapath the tessdata path to set + */ + public void setDatapath(String datapath) { + this.datapath = datapath; + } + + /** + * Sets language for OCR. + * + * @param language the language code, which follows ISO 639-3 standard. + */ + public void setLanguage(String language) { + this.language = language; + } + + /** + * Sets OCR engine mode. + * + * @param ocrEngineMode the OcrEngineMode to set + */ + public void setOcrEngineMode(int ocrEngineMode) { + this.ocrEngineMode = ocrEngineMode; + } + + /** + * Sets page segmentation mode. + * + * @param mode the page segmentation mode to set + */ + public void setPageSegMode(int mode) { + this.psm = mode; + } + + /** + * Enables hocr output. + * + * @param hocr to enable or disable hocr output + */ + public void setHocr(boolean hocr) { + this.hocr = hocr; + prop.setProperty("tessedit_create_hocr", hocr ? "1" : "0"); + } + + /** + * Set the value of Tesseract's internal parameter. + * + * @param key variable name, e.g., + * tessedit_create_hocr, + * tessedit_char_whitelist, etc. + * @param value value for corresponding variable, e.g., "1", "0", + * "0123456789", etc. + */ + public void setTessVariable(String key, String value) { + prop.setProperty(key, value); + } + + /** + * Performs OCR operation. + * + * @param bi a buffered image + * @return the recognized text + * @throws TesseractException + */ + public String doOCR(BufferedImage bi) throws TesseractException { + return doOCR(bi, null); + } + + /** + * Performs OCR operation. + * + * @param bi a buffered image + * @param rect the bounding rectangle defines the region of the image to be + * recognized. A rectangle of zero dimension or + * null indicates the whole image. + * @return the recognized text + * @throws TesseractException + */ + public String doOCR(BufferedImage bi, Rectangle rect) throws TesseractException { + IIOImage oimage = new IIOImage(bi, null, null); + List imageList = new ArrayList(); + imageList.add(oimage); + return doOCR(imageList, rect); + } + + /** + * Performs OCR operation. + * + * @param imageList a list of + * IIOImage objects + * @param rect the bounding rectangle defines the region of the image to be + * recognized. A rectangle of zero dimension or + * null indicates the whole image. + * @return the recognized text + * @throws TesseractException + */ + public String doOCR(List imageList, Rectangle rect) throws TesseractException { + StringBuilder sb = new StringBuilder(); + pageNum = 0; + + for (IIOImage oimage : imageList) { + pageNum++; + try { + ByteBuffer buf = ImageIOHelper.getImageByteBuffer(oimage); + RenderedImage ri = oimage.getRenderedImage(); + String pageText = doOCR(ri.getWidth(), ri.getHeight(), buf, rect, ri.getColorModel().getPixelSize()); + sb.append(pageText); + } catch (IOException ioe) { + //skip the problematic image + System.err.println(ioe.getMessage()); + } + } + + if (hocr) { + sb.insert(0, htmlBeginTag).append(htmlEndTag); + } + return sb.toString(); + } + + /** + * Performs OCR operation. Use + * SetImage, (optionally) + * SetRectangle, and one or more of the + * Get*Text functions. + * + * @param xsize width of image + * @param ysize height of image + * @param buf pixel data + * @param rect the bounding rectangle defines the region of the image to be + * recognized. A rectangle of zero dimension or + * null indicates the whole image. + * @param bpp bits per pixel, represents the bit depth of the image, with 1 + * for binary bitmap, 8 for gray, and 24 for color RGB. + * @return the recognized text + * @throws TesseractException + */ + public String doOCR(int xsize, int ysize, ByteBuffer buf, Rectangle rect, int bpp) throws TesseractException { + TessAPI api = TessAPI.INSTANCE; + TessAPI.TessBaseAPI handle = api.TessBaseAPICreate(); + api.TessBaseAPIInit2(handle, datapath, language, ocrEngineMode); + api.TessBaseAPISetPageSegMode(handle, psm); + + Enumeration em = prop.propertyNames(); + while (em.hasMoreElements()) { + String key = (String) em.nextElement(); + api.TessBaseAPISetVariable(handle, key, prop.getProperty(key)); + } + + int bytespp = bpp / 8; + int bytespl = (int) Math.ceil(xsize * bpp / 8.0); + api.TessBaseAPISetImage(handle, buf, xsize, ysize, bytespp, bytespl); + + if (rect != null && !rect.equals(EMPTY_RECTANGLE)) { + api.TessBaseAPISetRectangle(handle, rect.x, rect.y, rect.width, rect.height); + } + + Pointer utf8Text = hocr ? api.TessBaseAPIGetHOCRText(handle, pageNum - 1) : api.TessBaseAPIGetUTF8Text(handle); + String str = utf8Text.getString(0); + api.TessDeleteText(utf8Text); + api.TessBaseAPIDelete(handle); + + return str; + } +} diff --git a/docs-core/src/main/java/net/sourceforge/tess4j/TesseractException.java b/docs-core/src/main/java/net/sourceforge/tess4j/TesseractException.java new file mode 100644 index 00000000..1bda189f --- /dev/null +++ b/docs-core/src/main/java/net/sourceforge/tess4j/TesseractException.java @@ -0,0 +1,38 @@ +/** + * Copyright @ 2010 Quan Nguyen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package net.sourceforge.tess4j; + +public class TesseractException extends Exception { + + private static final long serialVersionUID = 1L; + + public TesseractException() { + super(); + } + + public TesseractException(String message) { + super(message); + } + + public TesseractException(Throwable cause) { + super(cause); + } + + public TesseractException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/docs-core/src/main/java/net/sourceforge/vietocr/ImageHelper.java b/docs-core/src/main/java/net/sourceforge/vietocr/ImageHelper.java new file mode 100644 index 00000000..f8b7a126 --- /dev/null +++ b/docs-core/src/main/java/net/sourceforge/vietocr/ImageHelper.java @@ -0,0 +1,173 @@ +/** + * Copyright @ 2008 Quan Nguyen + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package net.sourceforge.vietocr; + +import java.awt.Graphics2D; +import java.awt.Image; +import java.awt.RenderingHints; +import java.awt.Toolkit; +import java.awt.Transparency; +import java.awt.datatransfer.Clipboard; +import java.awt.datatransfer.DataFlavor; +import java.awt.image.*; + +public class ImageHelper { + + /** + * Convenience method that returns a scaled instance of the provided + * {@code BufferedImage}. + * + * @param image the original image to be scaled + * @param targetWidth the desired width of the scaled instance, in pixels + * @param targetHeight the desired height of the scaled instance, in pixels + * @return a scaled version of the original {@code BufferedImage} + */ + public static BufferedImage getScaledInstance(BufferedImage image, int targetWidth, int targetHeight) { + int type = (image.getTransparency() == Transparency.OPAQUE) + ? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB; + BufferedImage tmp = new BufferedImage(targetWidth, targetHeight, type); + Graphics2D g2 = tmp.createGraphics(); + g2.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g2.drawImage(image, 0, 0, targetWidth, targetHeight, null); + g2.dispose(); + return tmp; + } + + /** + * A replacement for the standard + * BufferedImage.getSubimage method. + * + * @param image + * @param x the X coordinate of the upper-left corner of the specified + * rectangular region + * @param y the Y coordinate of the upper-left corner of the specified + * rectangular region + * @param width the width of the specified rectangular region + * @param height the height of the specified rectangular region + * @return a BufferedImage that is the subimage of image. + */ + public static BufferedImage getSubImage(BufferedImage image, int x, int y, int width, int height) { + int type = (image.getTransparency() == Transparency.OPAQUE) + ? BufferedImage.TYPE_INT_RGB : BufferedImage.TYPE_INT_ARGB; + BufferedImage tmp = new BufferedImage(width, height, type); + Graphics2D g2 = tmp.createGraphics(); + g2.drawImage(image.getSubimage(x, y, width, height), 0, 0, null); + g2.dispose(); + return tmp; + } + + /** + * A simple method to convert an image to binary or B/W image. + * + * @param image input image + * @return a monochrome image + */ + public static BufferedImage convertImageToBinary(BufferedImage image) { + BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); + Graphics2D g2 = tmp.createGraphics(); + g2.drawImage(image, 0, 0, null); + g2.dispose(); + return tmp; + } + + /** + * A simple method to convert an image to binary or B/W image. + * + * @param image input image + * @return a monochrome image + * @deprecated As of release 1.1, renamed to {@link #convertImageToBinary(BufferedImage image)} + */ + @Deprecated + public static BufferedImage convertImage2Binary(BufferedImage image) { + return convertImageToBinary(image); + } + + /** + * A simple method to convert an image to gray scale. + * + * @param image input image + * @return a monochrome image + */ + public static BufferedImage convertImageToGrayscale(BufferedImage image) { + BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); + Graphics2D g2 = tmp.createGraphics(); + g2.drawImage(image, 0, 0, null); + g2.dispose(); + return tmp; + } + + private static final short[] invertTable; + + static { + invertTable = new short[256]; + for (int i = 0; i < 256; i++) { + invertTable[i] = (short) (255 - i); + } + } + + /** + * Inverts image color. + * + * @param image input image + * @return an inverted-color image + */ + public static BufferedImage invertImageColor(BufferedImage image) { + BufferedImage tmp = new BufferedImage(image.getWidth(), image.getHeight(), image.getType()); + BufferedImageOp invertOp = new LookupOp(new ShortLookupTable(0, invertTable), null); + return invertOp.filter(image, tmp); + } + + /** + * Rotates an image. + * + * @param image the original image + * @param angle the degree of rotation + * @return a rotated image + */ + public static BufferedImage rotateImage(BufferedImage image, double angle) { + double theta = Math.toRadians(angle); + double sin = Math.abs(Math.sin(theta)); + double cos = Math.abs(Math.cos(theta)); + int w = image.getWidth(); + int h = image.getHeight(); + int newW = (int) Math.floor(w * cos + h * sin); + int newH = (int) Math.floor(h * cos + w * sin); + + BufferedImage tmp = new BufferedImage(newW, newH, image.getType()); + Graphics2D g2d = tmp.createGraphics(); + g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, + RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g2d.translate((newW - w) / 2, (newH - h) / 2); + g2d.rotate(theta, w / 2, h / 2); + g2d.drawImage(image, 0, 0, null); + g2d.dispose(); + return tmp; + } + + /** + * Gets an image from Clipboard. + * + * @return image + */ + public static Image getClipboardImage() { + Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemClipboard(); + try { + return (Image) clipboard.getData(DataFlavor.imageFlavor); + } catch (Exception e) { + return null; + } + } +} diff --git a/docs-core/src/main/java/net/sourceforge/vietocr/ImageIOHelper.java b/docs-core/src/main/java/net/sourceforge/vietocr/ImageIOHelper.java new file mode 100644 index 00000000..154ad5c0 --- /dev/null +++ b/docs-core/src/main/java/net/sourceforge/vietocr/ImageIOHelper.java @@ -0,0 +1,128 @@ +/** + * Copyright @ 2008 Quan Nguyen + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package net.sourceforge.vietocr; + +import java.awt.Toolkit; +import java.awt.image.BufferedImage; +import java.awt.image.DataBufferByte; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +import javax.imageio.IIOImage; +import javax.imageio.ImageIO; +import javax.imageio.ImageWriteParam; +import javax.imageio.ImageWriter; +import javax.imageio.metadata.IIOMetadata; +import javax.imageio.metadata.IIOMetadataNode; +import javax.imageio.stream.ImageOutputStream; + +import org.w3c.dom.NodeList; + +import com.sun.media.imageio.plugins.tiff.TIFFImageWriteParam; +import com.sun.media.imageioimpl.plugins.tiff.TIFFImageWriterSpi; + +public class ImageIOHelper { + + final static String TIFF_FORMAT = "tiff"; + + + /** + * Gets pixel data of an + * IIOImage object. + * + * @param image an + * IIOImage object + * @return a byte buffer of pixel data + * @throws Exception + */ + public static ByteBuffer getImageByteBuffer(IIOImage image) throws IOException { + //Set up the writeParam + TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.US); + tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); + + //Get tif writer and set output to file + ImageWriter writer = new TIFFImageWriterSpi().createWriterInstance(); + + //Get the stream metadata + IIOMetadata streamMetadata = writer.getDefaultStreamMetadata(tiffWriteParam); + + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream); + writer.setOutput(ios); + writer.write(streamMetadata, new IIOImage(image.getRenderedImage(), null, null), tiffWriteParam); + writer.dispose(); + ios.seek(0); + BufferedImage bi = ImageIO.read(ios); + return convertImageData(bi); + } + + /** + * Converts BufferedImage to ByteBuffer. + * + * @param bi Input image + * @return pixel data + */ + public static ByteBuffer convertImageData(BufferedImage bi) { + byte[] pixelData = ((DataBufferByte) bi.getRaster().getDataBuffer()).getData(); + // return ByteBuffer.wrap(pixelData); + ByteBuffer buf = ByteBuffer.allocateDirect(pixelData.length); + buf.order(ByteOrder.nativeOrder()); + buf.put(pixelData); + buf.flip(); + return buf; + } + + /** + * Reads image meta data. + * + * @param oimage + * @return a map of meta data + */ + public static Map readImageData(IIOImage oimage) { + Map dict = new HashMap(); + + IIOMetadata imageMetadata = oimage.getMetadata(); + if (imageMetadata != null) { + IIOMetadataNode dimNode = (IIOMetadataNode) imageMetadata.getAsTree("javax_imageio_1.0"); + NodeList nodes = dimNode.getElementsByTagName("HorizontalPixelSize"); + int dpiX; + if (nodes.getLength() > 0) { + float dpcWidth = Float.parseFloat(nodes.item(0).getAttributes().item(0).getNodeValue()); + dpiX = (int) Math.round(25.4f / dpcWidth); + } else { + dpiX = Toolkit.getDefaultToolkit().getScreenResolution(); + } + dict.put("dpiX", String.valueOf(dpiX)); + + nodes = dimNode.getElementsByTagName("VerticalPixelSize"); + int dpiY; + if (nodes.getLength() > 0) { + float dpcHeight = Float.parseFloat(nodes.item(0).getAttributes().item(0).getNodeValue()); + dpiY = (int) Math.round(25.4f / dpcHeight); + } else { + dpiY = Toolkit.getDefaultToolkit().getScreenResolution(); + } + dict.put("dpiY", String.valueOf(dpiY)); + } + + return dict; + } +} diff --git a/docs-parent/TODO b/docs-parent/TODO index 93acc0cd..80fc035b 100644 --- a/docs-parent/TODO +++ b/docs-parent/TODO @@ -1,3 +1,3 @@ - Disable the whole document edit form while uploading files (client) - Change browser title while uploading (client) -- Automatic backup system using Quartz (server) \ No newline at end of file +- Automatic backup system using Quartz (server) diff --git a/docs-parent/lib/tess4j.jar b/docs-parent/lib/tess4j.jar deleted file mode 100644 index 2ab9b13b..00000000 Binary files a/docs-parent/lib/tess4j.jar and /dev/null differ diff --git a/docs-parent/pom.xml b/docs-parent/pom.xml index 0c651bd9..c1e82c23 100644 --- a/docs-parent/pom.xml +++ b/docs-parent/pom.xml @@ -463,11 +463,6 @@ 1.0
- - tess4j - tess4j - 1.0 - @@ -531,23 +526,6 @@ - - install-tess4j - validate - - ${project.basedir}/lib/tess4j.jar - default - tess4j - tess4j - 1.0 - jar - true - - - install-file - - -