mirror of
https://github.com/sismics/docs.git
synced 2024-11-25 23:27:57 +01:00
Closes #157: Deskew before OCR
This commit is contained in:
parent
46079393d5
commit
dca8c28b84
@ -2,10 +2,9 @@ package com.sismics.docs.core.util;
|
|||||||
|
|
||||||
import com.sismics.docs.core.model.jpa.File;
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
import com.sismics.tess4j.Tesseract;
|
import com.sismics.tess4j.Tesseract;
|
||||||
|
import com.sismics.util.ImageDeskew;
|
||||||
import com.sismics.util.ImageUtil;
|
import com.sismics.util.ImageUtil;
|
||||||
import org.imgscalr.Scalr;
|
import com.sismics.util.Scalr;
|
||||||
import org.imgscalr.Scalr.Method;
|
|
||||||
import org.imgscalr.Scalr.Mode;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -70,10 +69,13 @@ public class FileUtil {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upscale and grayscale the image
|
// Upscale, grayscale and deskew the image
|
||||||
BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500, Scalr.OP_ANTIALIAS, Scalr.OP_GRAYSCALE);
|
BufferedImage resizedImage = Scalr.resize(image, Scalr.Method.AUTOMATIC, Scalr.Mode.AUTOMATIC, 3500, Scalr.OP_ANTIALIAS, Scalr.OP_GRAYSCALE);
|
||||||
image.flush();
|
image.flush();
|
||||||
image = resizedImage;
|
ImageDeskew imageDeskew = new ImageDeskew(resizedImage);
|
||||||
|
BufferedImage deskewedImage = Scalr.rotate(resizedImage, - imageDeskew.getSkewAngle(), Scalr.OP_ANTIALIAS, Scalr.OP_GRAYSCALE);
|
||||||
|
resizedImage.flush();
|
||||||
|
image = deskewedImage;
|
||||||
|
|
||||||
// OCR the file
|
// OCR the file
|
||||||
try {
|
try {
|
||||||
|
167
docs-core/src/main/java/com/sismics/util/ImageDeskew.java
Normal file
167
docs-core/src/main/java/com/sismics/util/ImageDeskew.java
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
package com.sismics.util;
|
||||||
|
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <a url=http://www.jdeskew.com/>JDeskew</a>
|
||||||
|
*/
|
||||||
|
public class ImageDeskew {
|
||||||
|
/**
|
||||||
|
* Representation of a line in the image.
|
||||||
|
*/
|
||||||
|
public class HoughLine {
|
||||||
|
|
||||||
|
// count of points in the line
|
||||||
|
public int count = 0;
|
||||||
|
// index in matrix.
|
||||||
|
public int index = 0;
|
||||||
|
// the line is represented as all x, y that solve y * cos(alpha) - x *
|
||||||
|
// sin(alpha) = d
|
||||||
|
public double alpha;
|
||||||
|
public double d;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the source image
|
||||||
|
private BufferedImage cImage;
|
||||||
|
// the range of angles to search for lines
|
||||||
|
private double cAlphaStart = -20;
|
||||||
|
private double cAlphaStep = 0.2;
|
||||||
|
private int cSteps = 40 * 5;
|
||||||
|
// pre-calculation of sin and cos
|
||||||
|
private double[] cSinA;
|
||||||
|
private double[] cCosA;
|
||||||
|
// range of d
|
||||||
|
private double cDMin;
|
||||||
|
private double cDStep = 1.0;
|
||||||
|
private int cDCount;
|
||||||
|
// count of points that fit in a line
|
||||||
|
private int[] cHMatrix;
|
||||||
|
|
||||||
|
// constructor
|
||||||
|
public ImageDeskew(BufferedImage image) {
|
||||||
|
this.cImage = image;
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate the skew angle of the image cImage
|
||||||
|
public double getSkewAngle() {
|
||||||
|
ImageDeskew.HoughLine[] hl;
|
||||||
|
double sum = 0.0;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
// perform Hough Transformation
|
||||||
|
calc();
|
||||||
|
// top 20 of the detected lines in the image
|
||||||
|
hl = getTop(20);
|
||||||
|
|
||||||
|
if (hl.length >= 20) {
|
||||||
|
// average angle of the lines
|
||||||
|
for (int i = 0; i < 19; i++) {
|
||||||
|
sum += hl[i].alpha;
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return (sum / count);
|
||||||
|
} else {
|
||||||
|
return 0.0d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate the count lines in the image with most points
|
||||||
|
private ImageDeskew.HoughLine[] getTop(int count) {
|
||||||
|
|
||||||
|
ImageDeskew.HoughLine[] hl = new ImageDeskew.HoughLine[count];
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
hl[i] = new ImageDeskew.HoughLine();
|
||||||
|
}
|
||||||
|
|
||||||
|
ImageDeskew.HoughLine tmp;
|
||||||
|
|
||||||
|
for (int i = 0; i < (this.cHMatrix.length - 1); i++) {
|
||||||
|
if (this.cHMatrix[i] > hl[count - 1].count) {
|
||||||
|
hl[count - 1].count = this.cHMatrix[i];
|
||||||
|
hl[count - 1].index = i;
|
||||||
|
int j = count - 1;
|
||||||
|
while ((j > 0) && (hl[j].count > hl[j - 1].count)) {
|
||||||
|
tmp = hl[j];
|
||||||
|
hl[j] = hl[j - 1];
|
||||||
|
hl[j - 1] = tmp;
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int alphaIndex;
|
||||||
|
int dIndex;
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
dIndex = hl[i].index / cSteps; // integer division, no
|
||||||
|
// remainder
|
||||||
|
alphaIndex = hl[i].index - dIndex * cSteps;
|
||||||
|
hl[i].alpha = getAlpha(alphaIndex);
|
||||||
|
hl[i].d = dIndex + cDMin;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hough Transformation
|
||||||
|
private void calc() {
|
||||||
|
int hMin = (int) ((this.cImage.getHeight()) / 4.0);
|
||||||
|
int hMax = (int) ((this.cImage.getHeight()) * 3.0 / 4.0);
|
||||||
|
init();
|
||||||
|
|
||||||
|
for (int y = hMin; y < hMax; y++) {
|
||||||
|
for (int x = 1; x < (this.cImage.getWidth() - 2); x++) {
|
||||||
|
// only lower edges are considered
|
||||||
|
if (ImageUtil.isBlack(this.cImage, x, y)) {
|
||||||
|
if (!ImageUtil.isBlack(this.cImage, x, y + 1)) {
|
||||||
|
calc(x, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate all lines through the point (x,y)
|
||||||
|
private void calc(int x, int y) {
|
||||||
|
double d;
|
||||||
|
int dIndex;
|
||||||
|
int index;
|
||||||
|
|
||||||
|
for (int alpha = 0; alpha < (this.cSteps - 1); alpha++) {
|
||||||
|
d = y * this.cCosA[alpha] - x * this.cSinA[alpha];
|
||||||
|
dIndex = (int) (d - this.cDMin);
|
||||||
|
index = dIndex * this.cSteps + alpha;
|
||||||
|
try {
|
||||||
|
this.cHMatrix[index] += 1;
|
||||||
|
} catch (Exception ex) {
|
||||||
|
System.out.println(ex.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void init() {
|
||||||
|
|
||||||
|
double angle;
|
||||||
|
|
||||||
|
// pre-calculation of sin and cos
|
||||||
|
this.cSinA = new double[this.cSteps - 1];
|
||||||
|
this.cCosA = new double[this.cSteps - 1];
|
||||||
|
|
||||||
|
for (int i = 0; i < (this.cSteps - 1); i++) {
|
||||||
|
angle = getAlpha(i) * Math.PI / 180.0;
|
||||||
|
this.cSinA[i] = Math.sin(angle);
|
||||||
|
this.cCosA[i] = Math.cos(angle);
|
||||||
|
}
|
||||||
|
|
||||||
|
// range of d
|
||||||
|
this.cDMin = -this.cImage.getWidth();
|
||||||
|
this.cDCount = (int) (2.0 * ((this.cImage.getWidth() + this.cImage.getHeight())) / this.cDStep);
|
||||||
|
this.cHMatrix = new int[this.cDCount * this.cSteps];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private double getAlpha(int index) {
|
||||||
|
return this.cAlphaStart + (index * this.cAlphaStep);
|
||||||
|
}
|
||||||
|
}
|
@ -1,19 +1,19 @@
|
|||||||
package com.sismics.util;
|
package com.sismics.util;
|
||||||
|
|
||||||
import java.awt.image.BufferedImage;
|
import com.google.common.base.Charsets;
|
||||||
import java.io.IOException;
|
import com.google.common.hash.Hashing;
|
||||||
import java.io.OutputStream;
|
import com.sismics.util.mime.MimeType;
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import javax.imageio.IIOImage;
|
import javax.imageio.IIOImage;
|
||||||
import javax.imageio.ImageIO;
|
import javax.imageio.ImageIO;
|
||||||
import javax.imageio.ImageWriteParam;
|
import javax.imageio.ImageWriteParam;
|
||||||
import javax.imageio.ImageWriter;
|
import javax.imageio.ImageWriter;
|
||||||
import javax.imageio.stream.ImageOutputStream;
|
import javax.imageio.stream.ImageOutputStream;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
import com.google.common.base.Charsets;
|
import java.awt.image.WritableRaster;
|
||||||
import com.google.common.hash.Hashing;
|
import java.io.IOException;
|
||||||
import com.sismics.util.mime.MimeType;
|
import java.io.OutputStream;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Image processing utilities.
|
* Image processing utilities.
|
||||||
@ -34,7 +34,7 @@ public class ImageUtil {
|
|||||||
ImageWriter writer = null;
|
ImageWriter writer = null;
|
||||||
ImageOutputStream imageOutputStream = null;
|
ImageOutputStream imageOutputStream = null;
|
||||||
try {
|
try {
|
||||||
writer = (ImageWriter) iter.next();
|
writer = iter.next();
|
||||||
ImageWriteParam iwp = writer.getDefaultWriteParam();
|
ImageWriteParam iwp = writer.getDefaultWriteParam();
|
||||||
iwp.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
|
iwp.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
|
||||||
iwp.setCompressionQuality(1.f);
|
iwp.setCompressionQuality(1.f);
|
||||||
@ -69,7 +69,7 @@ public class ImageUtil {
|
|||||||
* Compute Gravatar hash.
|
* Compute Gravatar hash.
|
||||||
* See https://en.gravatar.com/site/implement/hash/.
|
* See https://en.gravatar.com/site/implement/hash/.
|
||||||
*
|
*
|
||||||
* @param email
|
* @param email Email
|
||||||
* @return Gravatar hash
|
* @return Gravatar hash
|
||||||
*/
|
*/
|
||||||
public static String computeGravatar(String email) {
|
public static String computeGravatar(String email) {
|
||||||
@ -81,4 +81,40 @@ public class ImageUtil {
|
|||||||
email.trim().toLowerCase(), Charsets.UTF_8)
|
email.trim().toLowerCase(), Charsets.UTF_8)
|
||||||
.toString();
|
.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isBlack(BufferedImage image, int x, int y) {
|
||||||
|
if (image.getType() == BufferedImage.TYPE_BYTE_BINARY) {
|
||||||
|
WritableRaster raster = image.getRaster();
|
||||||
|
int pixelRGBValue = raster.getSample(x, y, 0);
|
||||||
|
return pixelRGBValue == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int luminanceValue = 140;
|
||||||
|
return isBlack(image, x, y, luminanceValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isBlack(BufferedImage image, int x, int y, int luminanceCutOff) {
|
||||||
|
int pixelRGBValue;
|
||||||
|
int r;
|
||||||
|
int g;
|
||||||
|
int b;
|
||||||
|
double luminance = 0.0;
|
||||||
|
|
||||||
|
// return white on areas outside of image boundaries
|
||||||
|
if (x < 0 || y < 0 || x > image.getWidth() || y > image.getHeight()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
pixelRGBValue = image.getRGB(x, y);
|
||||||
|
r = (pixelRGBValue >> 16) & 0xff;
|
||||||
|
g = (pixelRGBValue >> 8) & 0xff;
|
||||||
|
b = (pixelRGBValue) & 0xff;
|
||||||
|
luminance = (r * 0.299) + (g * 0.587) + (b * 0.114);
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore.
|
||||||
|
}
|
||||||
|
|
||||||
|
return luminance < luminanceCutOff;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
47
docs-core/src/main/java/com/sismics/util/Scalr.java
Normal file
47
docs-core/src/main/java/com/sismics/util/Scalr.java
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
package com.sismics.util;
|
||||||
|
|
||||||
|
import java.awt.*;
|
||||||
|
import java.awt.geom.AffineTransform;
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.awt.image.BufferedImageOp;
|
||||||
|
import java.awt.image.ImagingOpException;
|
||||||
|
|
||||||
|
public class Scalr extends org.imgscalr.Scalr {
|
||||||
|
/**
|
||||||
|
* Rotate an image by a specific amount.
|
||||||
|
*
|
||||||
|
* @param src Source image
|
||||||
|
* @param rotation Rotation angle
|
||||||
|
* @param ops Options
|
||||||
|
* @return Rotated image
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* @throws ImagingOpException
|
||||||
|
*/
|
||||||
|
public static BufferedImage rotate(BufferedImage src, double rotation, BufferedImageOp... ops) throws IllegalArgumentException, ImagingOpException {
|
||||||
|
long t = System.currentTimeMillis();
|
||||||
|
if (src == null) {
|
||||||
|
throw new IllegalArgumentException("src cannot be null");
|
||||||
|
} else {
|
||||||
|
if (DEBUG) {
|
||||||
|
log(0, "Rotating Image [%s]...", rotation);
|
||||||
|
}
|
||||||
|
|
||||||
|
AffineTransform tx = new AffineTransform();
|
||||||
|
tx.rotate(Math.toRadians(rotation));
|
||||||
|
|
||||||
|
BufferedImage result = createOptimalImage(src, src.getWidth(), src.getHeight());
|
||||||
|
Graphics2D g2d = result.createGraphics();
|
||||||
|
g2d.drawImage(src, tx, null);
|
||||||
|
g2d.dispose();
|
||||||
|
if (DEBUG) {
|
||||||
|
log(0, "Rotation Applied in %d ms, result [width=%d, height=%d]", System.currentTimeMillis() - t, result.getWidth(), result.getHeight());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ops != null && ops.length > 0) {
|
||||||
|
result = apply(result, ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,11 +1,6 @@
|
|||||||
<img src="img/loader.gif" ng-show="!document && isEdit()" />
|
<img src="img/loader.gif" ng-show="!document && isEdit()" />
|
||||||
|
|
||||||
<div ng-show="document || !isEdit()">
|
<div ng-show="document || !isEdit()">
|
||||||
<div class="row" ng-show="fileIsUploading">
|
|
||||||
<h4>{{ 'document.edit.uploading_files' | translate }}</h4>
|
|
||||||
<div class="col-md-6"><uib-progressbar value="fileProgress" class="progress-info active"></uib-progressbar></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<form name="documentForm" class="form-horizontal">
|
<form name="documentForm" class="form-horizontal">
|
||||||
<div class="pull-right btn-group" ng-init="form = documentForm">
|
<div class="pull-right btn-group" ng-init="form = documentForm">
|
||||||
<button type="submit" class="btn btn-primary" ng-disabled="!documentForm.$valid || fileIsUploading" ng-click="edit()">
|
<button type="submit" class="btn btn-primary" ng-disabled="!documentForm.$valid || fileIsUploading" ng-click="edit()">
|
||||||
@ -17,6 +12,11 @@
|
|||||||
|
|
||||||
<div uib-alert ng-class="'alert-' + alert.type" ng-repeat="alert in alerts" type="alert.type" close="closeAlert($index)">{{ alert.msg }}</div>
|
<div uib-alert ng-class="'alert-' + alert.type" ng-repeat="alert in alerts" type="alert.type" close="closeAlert($index)">{{ alert.msg }}</div>
|
||||||
|
|
||||||
|
<div class="row" ng-show="fileIsUploading">
|
||||||
|
<h4>{{ 'document.edit.uploading_files' | translate }}</h4>
|
||||||
|
<div class="col-md-6"><uib-progressbar value="fileProgress" class="progress-info active"></uib-progressbar></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>{{ 'document.edit.primary_metadata' | translate }}</legend>
|
<legend>{{ 'document.edit.primary_metadata' | translate }}</legend>
|
||||||
<div class="form-group" ng-class="{ 'has-error': !documentForm.title.$valid && documentForm.$dirty }">
|
<div class="form-group" ng-class="{ 'has-error': !documentForm.title.$valid && documentForm.$dirty }">
|
||||||
|
Loading…
Reference in New Issue
Block a user