mirror of
https://github.com/sismics/docs.git
synced 2024-12-25 12:43:49 +01:00
Force loading of corrupted PDF
This commit is contained in:
parent
77c5a10aba
commit
2c7083aa43
@ -40,7 +40,7 @@ public class FileCreatedAsyncListener {
|
||||
final File file = fileCreatedAsyncEvent.getFile();
|
||||
long startTime = System.currentTimeMillis();
|
||||
final String content = FileUtil.extractContent(fileCreatedAsyncEvent.getDocument(), file);
|
||||
log.info(MessageFormat.format("File OCR-ized in {0}ms", System.currentTimeMillis() - startTime));
|
||||
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
|
||||
|
||||
// Store the OCR-ization result in the database
|
||||
TransactionUtil.handle(new Runnable() {
|
||||
|
@ -103,7 +103,7 @@ public class FileUtil {
|
||||
java.io.File storedfile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file.getId()).toFile();
|
||||
try {
|
||||
PDFTextStripper stripper = new PDFTextStripper();
|
||||
pdfDocument = PDDocument.load(storedfile);
|
||||
pdfDocument = PDDocument.load(storedfile.getAbsolutePath(), true);
|
||||
content = stripper.getText(pdfDocument);
|
||||
} catch (IOException e) {
|
||||
log.error("Error while extracting text from the PDF " + storedfile, e);
|
||||
@ -153,7 +153,7 @@ public class FileUtil {
|
||||
image = ImageIO.read(originalFile);
|
||||
} else if(file.getMimeType().equals(MimeType.APPLICATION_PDF)) {
|
||||
// Generate preview from the first page of the PDF
|
||||
PDDocument pdfDocument = PDDocument.load(originalFile);
|
||||
PDDocument pdfDocument = PDDocument.load(originalFile.getAbsolutePath(), true);
|
||||
@SuppressWarnings("unchecked")
|
||||
List<PDPage> pageList = pdfDocument.getDocumentCatalog().getAllPages();
|
||||
if (pageList.size() > 0) {
|
||||
|
@ -148,21 +148,21 @@ public class AppResource extends BaseResource {
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR-ize all files again.
|
||||
* Extract content from all files again.
|
||||
*
|
||||
* @return Response
|
||||
* @throws JSONException
|
||||
*/
|
||||
@POST
|
||||
@Path("batch/ocr")
|
||||
@Path("batch/extract")
|
||||
@Produces(MediaType.APPLICATION_JSON)
|
||||
public Response batchOcr() throws JSONException {
|
||||
public Response batchExtract() throws JSONException {
|
||||
if (!authenticate()) {
|
||||
throw new ForbiddenClientException();
|
||||
}
|
||||
checkBaseFunction(BaseFunction.ADMIN);
|
||||
|
||||
// Raise a OCR file event
|
||||
// Raise an extract file content event
|
||||
AppContext.getInstance().getAsyncEventBus().post(new ExtractFileAsyncEvent());
|
||||
|
||||
JSONObject response = new JSONObject();
|
||||
|
@ -33,7 +33,7 @@
|
||||
<div class="control-group">
|
||||
<label class="control-label" for="inputFiles">New files</label>
|
||||
<div class="controls">
|
||||
<file class="input-block-level" id="inputFiles" multiple="multiple" ng-model="newFiles" accept="image/png,image/jpg,image/jpeg,image/gif" />
|
||||
<file class="input-block-level" id="inputFiles" multiple="multiple" ng-model="newFiles" accept="image/png,image/jpg,image/jpeg,image/gif,application/pdf" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="control-group">
|
||||
|
@ -44,7 +44,7 @@ public class TestAppResource extends BaseJerseyTest {
|
||||
Assert.assertEquals(0, json.getInt("document_count"));
|
||||
|
||||
// OCR-ize all files
|
||||
appResource = resource().path("/app/batch/ocr");
|
||||
appResource = resource().path("/app/batch/extract");
|
||||
appResource.addFilter(new CookieAuthenticationFilter(adminAuthenticationToken));
|
||||
response = appResource.post(ClientResponse.class);
|
||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||
|
Loading…
Reference in New Issue
Block a user