mirror of
https://github.com/sismics/docs.git
synced 2024-06-03 00:54:32 +02:00
76 lines
2.1 KiB
Java
76 lines
2.1 KiB
Java
package com.sismics.docs.core.util;
|
|
|
|
import java.awt.image.BufferedImage;
|
|
import java.io.IOException;
|
|
import java.nio.file.Paths;
|
|
|
|
import javax.imageio.ImageIO;
|
|
|
|
import net.sourceforge.tess4j.Tesseract;
|
|
|
|
import org.imgscalr.Scalr;
|
|
import org.imgscalr.Scalr.Method;
|
|
import org.imgscalr.Scalr.Mode;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
import com.sismics.docs.core.dao.jpa.FileDao;
|
|
import com.sismics.docs.core.model.jpa.Document;
|
|
import com.sismics.docs.core.model.jpa.File;
|
|
|
|
/**
|
|
* File entity utilities.
|
|
*
|
|
* @author bgamard
|
|
*/
|
|
public class FileUtil {
|
|
/**
|
|
* Logger.
|
|
*/
|
|
private static final Logger log = LoggerFactory.getLogger(FileUtil.class);
|
|
|
|
/**
|
|
* OCR a file.
|
|
*
|
|
* @param document Document linked to the file
|
|
* @param file File to OCR
|
|
*/
|
|
public static void ocrFile(Document document, final File file) {
|
|
Tesseract instance = Tesseract.getInstance();
|
|
java.io.File storedfile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file.getId()).toFile();
|
|
String content = null;
|
|
BufferedImage image = null;
|
|
try {
|
|
image = ImageIO.read(storedfile);
|
|
} catch (IOException e) {
|
|
log.error("Error reading the image " + storedfile, e);
|
|
}
|
|
|
|
// Upscale the image if it is too small
|
|
if (image.getWidth() < 2500 || image.getHeight() < 2500) {
|
|
BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500);
|
|
image.flush();
|
|
image = resizedImage;
|
|
}
|
|
|
|
// OCR the file
|
|
try {
|
|
instance.setLanguage(document.getLanguage());
|
|
content = instance.doOCR(image);
|
|
} catch (Exception e) {
|
|
log.error("Error while OCR-izing the file " + storedfile, e);
|
|
}
|
|
|
|
file.setContent(content);
|
|
|
|
// Store the OCR-ization result in the database
|
|
TransactionUtil.handle(new Runnable() {
|
|
@Override
|
|
public void run() {
|
|
FileDao fileDao = new FileDao();
|
|
fileDao.updateContent(file);
|
|
}
|
|
});
|
|
}
|
|
}
|