Closes #201: reprocess file manually

This commit is contained in:
Benjamin Gamard 2018-03-29 11:34:25 +02:00
parent 0409c2ef79
commit 899f13cb35
16 changed files with 243 additions and 90 deletions

View File

@ -77,8 +77,20 @@ public class LuceneDao {
} }
/** /**
* Update document index. * Update file index.
* *
* @param file Updated file
*/
public void updateFile(final File file) {
LuceneUtil.handle(indexWriter -> {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file);
indexWriter.updateDocument(new Term("id", file.getId()), luceneDocument);
});
}
/**
* Update document index.
*
* @param document Updated document * @param document Updated document
*/ */
public void updateDocument(final Document document) { public void updateDocument(final Document document) {
@ -87,7 +99,7 @@ public class LuceneDao {
indexWriter.updateDocument(new Term("id", document.getId()), luceneDocument); indexWriter.updateDocument(new Term("id", document.getId()), luceneDocument);
}); });
} }
/** /**
* Delete document from the index. * Delete document from the index.
* *
@ -112,7 +124,7 @@ public class LuceneDao {
// Build search query // Build search query
StandardQueryParser qpHelper = new StandardQueryParser(new StandardAnalyzer()); StandardQueryParser qpHelper = new StandardQueryParser(new StandardAnalyzer());
qpHelper.setPhraseSlop(100000); // PhraseQuery add terms qpHelper.setPhraseSlop(100); // PhraseQuery add terms
// Search on documents and files // Search on documents and files
BooleanQuery query = new BooleanQuery.Builder() BooleanQuery query = new BooleanQuery.Builder()
@ -126,6 +138,7 @@ public class LuceneDao {
.add(qpHelper.parse(searchQuery, "type"), Occur.SHOULD) .add(qpHelper.parse(searchQuery, "type"), Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "coverage"), Occur.SHOULD) .add(qpHelper.parse(searchQuery, "coverage"), Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "rights"), Occur.SHOULD) .add(qpHelper.parse(searchQuery, "rights"), Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "filename"), Occur.SHOULD)
.add(qpHelper.parse(fullSearchQuery, "content"), Occur.SHOULD) .add(qpHelper.parse(fullSearchQuery, "content"), Occur.SHOULD)
.build(); .build();
@ -150,7 +163,9 @@ public class LuceneDao {
} else if (type.equals("file")) { } else if (type.equals("file")) {
documentId = document.get("document_id"); documentId = document.get("document_id");
} }
documentIdList.add(documentId); if (documentId != null) {
documentIdList.add(documentId);
}
} }
return documentIdList; return documentIdList;
@ -208,7 +223,12 @@ public class LuceneDao {
org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
luceneDocument.add(new StringField("id", file.getId(), Field.Store.YES)); luceneDocument.add(new StringField("id", file.getId(), Field.Store.YES));
luceneDocument.add(new StringField("doctype", "file", Field.Store.YES)); luceneDocument.add(new StringField("doctype", "file", Field.Store.YES));
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES)); if (file.getName() != null) {
luceneDocument.add(new TextField("filename", file.getName(), Field.Store.NO));
}
if (file.getDocumentId() != null) {
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
}
if (file.getContent() != null) { if (file.getContent() != null) {
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO)); luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO));
} }

View File

@ -1,61 +1,9 @@
package com.sismics.docs.core.event; package com.sismics.docs.core.event;
import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.File;
import java.nio.file.Path;
/** /**
* New file created event. * New file created event.
* *
* @author bgamard * @author bgamard
*/ */
public class FileCreatedAsyncEvent extends UserEvent { public class FileCreatedAsyncEvent extends FileEvent {
/**
* Created file.
*/
private File file;
/**
* Language of the file.
*/
private String language;
/**
* Unencrypted original file.
*/
private Path unencryptedFile;
public File getFile() {
return file;
}
public void setFile(File file) {
this.file = file;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public Path getUnencryptedFile() {
return unencryptedFile;
}
public FileCreatedAsyncEvent setUnencryptedFile(Path unencryptedFile) {
this.unencryptedFile = unencryptedFile;
return this;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("file", file)
.add("language", language)
.toString();
}
} }

View File

@ -0,0 +1,61 @@
package com.sismics.docs.core.event;
import com.google.common.base.MoreObjects;
import com.sismics.docs.core.model.jpa.File;
import java.nio.file.Path;
/**
* New file event.
*
* @author bgamard
*/
public abstract class FileEvent extends UserEvent {
/**
* Created file.
*/
private File file;
/**
* Language of the file.
*/
private String language;
/**
* Unencrypted original file.
*/
private Path unencryptedFile;
public File getFile() {
return file;
}
public void setFile(File file) {
this.file = file;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public Path getUnencryptedFile() {
return unencryptedFile;
}
public FileEvent setUnencryptedFile(Path unencryptedFile) {
this.unencryptedFile = unencryptedFile;
return this;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("file", file)
.add("language", language)
.toString();
}
}

View File

@ -0,0 +1,9 @@
package com.sismics.docs.core.event;
/**
* New file created event.
*
* @author bgamard
*/
public class FileUpdatedAsyncEvent extends FileEvent {
}

View File

@ -5,6 +5,8 @@ import com.sismics.docs.core.dao.jpa.FileDao;
import com.sismics.docs.core.dao.jpa.UserDao; import com.sismics.docs.core.dao.jpa.UserDao;
import com.sismics.docs.core.dao.lucene.LuceneDao; import com.sismics.docs.core.dao.lucene.LuceneDao;
import com.sismics.docs.core.event.FileCreatedAsyncEvent; import com.sismics.docs.core.event.FileCreatedAsyncEvent;
import com.sismics.docs.core.event.FileEvent;
import com.sismics.docs.core.event.FileUpdatedAsyncEvent;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.model.jpa.User; import com.sismics.docs.core.model.jpa.User;
import com.sismics.docs.core.util.DirectoryUtil; import com.sismics.docs.core.util.DirectoryUtil;
@ -28,19 +30,19 @@ import java.text.MessageFormat;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
/** /**
* Listener on file created. * Listener on file processing.
* *
* @author bgamard * @author bgamard
*/ */
public class FileCreatedAsyncListener { public class FileProcessingAsyncListener {
/** /**
* Logger. * Logger.
*/ */
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class); private static final Logger log = LoggerFactory.getLogger(FileProcessingAsyncListener.class);
/** /**
* File created. * File created.
* *
* @param event File created event * @param event File created event
*/ */
@Subscribe @Subscribe
@ -49,6 +51,41 @@ public class FileCreatedAsyncListener {
log.info("File created event: " + event.toString()); log.info("File created event: " + event.toString());
} }
processFile(event);
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.createFile(event.getFile());
FileUtil.endProcessingFile(event.getFile().getId());
}
/**
* File updated.
*
* @param event File updated event
*/
@Subscribe
public void on(final FileUpdatedAsyncEvent event) {
if (log.isInfoEnabled()) {
log.info("File updated event: " + event.toString());
}
processFile(event);
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.updateFile(event.getFile());
FileUtil.endProcessingFile(event.getFile().getId());
}
/**
* Process the file (create/update).
*
* @param event File event
*/
private void processFile(FileEvent event) {
// Find a format handler // Find a format handler
final File file = event.getFile(); final File file = event.getFile();
FormatHandler formatHandler = FormatHandlerUtil.find(file.getMimeType()); FormatHandler formatHandler = FormatHandlerUtil.find(file.getMimeType());
@ -102,7 +139,7 @@ public class FileCreatedAsyncListener {
try { try {
content.set(formatHandler.extractContent(event.getLanguage(), event.getUnencryptedFile())); content.set(formatHandler.extractContent(event.getLanguage(), event.getUnencryptedFile()));
} catch (Exception e) { } catch (Exception e) {
log.error("Error extracting content from: " + event.getFile()); log.error("Error extracting content from: " + event.getFile(), e);
} }
log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime)); log.info(MessageFormat.format("File content extracted in {0}ms", System.currentTimeMillis() - startTime));
@ -117,13 +154,5 @@ public class FileCreatedAsyncListener {
file.setContent(content.get()); file.setContent(content.get());
fileDao.update(file); fileDao.update(file);
}); });
if (file.getDocumentId() != null) {
// Update Lucene index
LuceneDao luceneDao = new LuceneDao();
luceneDao.createFile(event.getFile());
}
FileUtil.endProcessingFile(file.getId());
} }
} }

View File

@ -17,7 +17,7 @@ public class TemporaryFileCleanupAsyncListener {
/** /**
* Logger. * Logger.
*/ */
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class); private static final Logger log = LoggerFactory.getLogger(TemporaryFileCleanupAsyncListener.class);
/** /**
* Cleanup temporary files. * Cleanup temporary files.

View File

@ -117,7 +117,7 @@ public class AppContext {
asyncExecutorList = new ArrayList<>(); asyncExecutorList = new ArrayList<>();
asyncEventBus = newAsyncEventBus(); asyncEventBus = newAsyncEventBus();
asyncEventBus.register(new FileCreatedAsyncListener()); asyncEventBus.register(new FileProcessingAsyncListener());
asyncEventBus.register(new FileDeletedAsyncListener()); asyncEventBus.register(new FileDeletedAsyncListener());
asyncEventBus.register(new DocumentCreatedAsyncListener()); asyncEventBus.register(new DocumentCreatedAsyncListener());
asyncEventBus.register(new DocumentUpdatedAsyncListener()); asyncEventBus.register(new DocumentUpdatedAsyncListener());

View File

@ -232,7 +232,8 @@ public class InboxService extends AbstractScheduledService {
// Add files to the document // Add files to the document
for (EmailUtil.FileContent fileContent : mailContent.getFileContentList()) { for (EmailUtil.FileContent fileContent : mailContent.getFileContentList()) {
FileUtil.createFile(fileContent.getName(), fileContent.getFile(), fileContent.getSize(), "eng", "admin", document.getId()); FileUtil.createFile(fileContent.getName(), fileContent.getFile(), fileContent.getSize(),
document.getLanguage(), "admin", document.getId());
} }
} }

View File

@ -54,7 +54,7 @@ public class EncryptionUtil {
* @param is InputStream to encrypt * @param is InputStream to encrypt
* @param privateKey Private key * @param privateKey Private key
* @return Encrypted stream * @return Encrypted stream
* @throws Exception * @throws Exception e
*/ */
public static InputStream decryptInputStream(InputStream is, String privateKey) throws Exception { public static InputStream decryptInputStream(InputStream is, String privateKey) throws Exception {
return new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE)); return new CipherInputStream(is, getCipher(privateKey, Cipher.DECRYPT_MODE));
@ -66,7 +66,7 @@ public class EncryptionUtil {
* @param file Encrypted file * @param file Encrypted file
* @param privateKey Private key * @param privateKey Private key
* @return Decrypted temporary file * @return Decrypted temporary file
* @throws Exception * @throws Exception e
*/ */
public static Path decryptFile(Path file, String privateKey) throws Exception { public static Path decryptFile(Path file, String privateKey) throws Exception {
if (privateKey == null) { if (privateKey == null) {
@ -86,7 +86,7 @@ public class EncryptionUtil {
* *
* @param privateKey Private key * @param privateKey Private key
* @return Encryption cipher * @return Encryption cipher
* @throws Exception * @throws Exception e
*/ */
public static Cipher getEncryptionCipher(String privateKey) throws Exception { public static Cipher getEncryptionCipher(String privateKey) throws Exception {
if (Strings.isNullOrEmpty(privateKey)) { if (Strings.isNullOrEmpty(privateKey)) {
@ -101,7 +101,7 @@ public class EncryptionUtil {
* @param privateKey Private key * @param privateKey Private key
* @param mode Mode (encrypt or decrypt) * @param mode Mode (encrypt or decrypt)
* @return Cipher * @return Cipher
* @throws Exception * @throws Exception e
*/ */
private static Cipher getCipher(String privateKey, int mode) throws Exception { private static Cipher getCipher(String privateKey, int mode) throws Exception {
PBEKeySpec keySpec = new PBEKeySpec(privateKey.toCharArray(), SALT.getBytes(), 2000, 256); PBEKeySpec keySpec = new PBEKeySpec(privateKey.toCharArray(), SALT.getBytes(), 2000, 256);

View File

@ -45,6 +45,10 @@ public class ImageFormatHandler implements FormatHandler {
@Override @Override
public String extractContent(String language, Path file) throws Exception { public String extractContent(String language, Path file) throws Exception {
if (language == null) {
return null;
}
try (InputStream inputStream = Files.newInputStream(file)) { try (InputStream inputStream = Files.newInputStream(file)) {
return FileUtil.ocrFile(language, ImageIO.read(inputStream)); return FileUtil.ocrFile(language, ImageIO.read(inputStream));
} }

View File

@ -894,7 +894,8 @@ public class DocumentResource extends BaseResource {
// Add files to the document // Add files to the document
try { try {
for (EmailUtil.FileContent fileContent : mailContent.getFileContentList()) { for (EmailUtil.FileContent fileContent : mailContent.getFileContentList()) {
FileUtil.createFile(fileContent.getName(), fileContent.getFile(), fileContent.getSize(), "eng", principal.getId(), document.getId()); FileUtil.createFile(fileContent.getName(), fileContent.getFile(), fileContent.getSize(),
document.getLanguage(), principal.getId(), document.getId());
} }
} catch (IOException e) { } catch (IOException e) {
throw new ClientException(e.getMessage(), e.getMessage(), e); throw new ClientException(e.getMessage(), e.getMessage(), e);

View File

@ -10,8 +10,8 @@ import com.sismics.docs.core.dao.jpa.FileDao;
import com.sismics.docs.core.dao.jpa.UserDao; import com.sismics.docs.core.dao.jpa.UserDao;
import com.sismics.docs.core.dao.jpa.dto.DocumentDto; import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent; import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent;
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
import com.sismics.docs.core.event.FileDeletedAsyncEvent; import com.sismics.docs.core.event.FileDeletedAsyncEvent;
import com.sismics.docs.core.event.FileUpdatedAsyncEvent;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.model.jpa.User; import com.sismics.docs.core.model.jpa.User;
import com.sismics.docs.core.util.DirectoryUtil; import com.sismics.docs.core.util.DirectoryUtil;
@ -166,7 +166,7 @@ public class FileResource extends BaseResource {
} }
// Validate input data // Validate input data
ValidationUtil.validateRequired(documentId, "id"); ValidationUtil.validateRequired(documentId, "documentId");
// Get the current user // Get the current user
UserDao userDao = new UserDao(); UserDao userDao = new UserDao();
@ -191,17 +191,17 @@ public class FileResource extends BaseResource {
file.setOrder(fileDao.getByDocumentId(principal.getId(), documentId).size()); file.setOrder(fileDao.getByDocumentId(principal.getId(), documentId).size());
fileDao.update(file); fileDao.update(file);
// Raise a new file created event and document updated event (it wasn't sent during file creation) // Raise a new file updated event and document updated event (it wasn't sent during file creation)
try { try {
java.nio.file.Path storedFile = DirectoryUtil.getStorageDirectory().resolve(id); java.nio.file.Path storedFile = DirectoryUtil.getStorageDirectory().resolve(id);
java.nio.file.Path unencryptedFile = EncryptionUtil.decryptFile(storedFile, user.getPrivateKey()); java.nio.file.Path unencryptedFile = EncryptionUtil.decryptFile(storedFile, user.getPrivateKey());
FileUtil.startProcessingFile(id); FileUtil.startProcessingFile(id);
FileCreatedAsyncEvent fileCreatedAsyncEvent = new FileCreatedAsyncEvent(); FileUpdatedAsyncEvent fileUpdatedAsyncEvent = new FileUpdatedAsyncEvent();
fileCreatedAsyncEvent.setUserId(principal.getId()); fileUpdatedAsyncEvent.setUserId(principal.getId());
fileCreatedAsyncEvent.setLanguage(documentDto.getLanguage()); fileUpdatedAsyncEvent.setLanguage(documentDto.getLanguage());
fileCreatedAsyncEvent.setFile(file); fileUpdatedAsyncEvent.setFile(file);
fileCreatedAsyncEvent.setUnencryptedFile(unencryptedFile); fileUpdatedAsyncEvent.setUnencryptedFile(unencryptedFile);
ThreadLocalContext.get().addAsyncEvent(fileCreatedAsyncEvent); ThreadLocalContext.get().addAsyncEvent(fileUpdatedAsyncEvent);
DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent(); DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent();
documentUpdatedAsyncEvent.setUserId(principal.getId()); documentUpdatedAsyncEvent.setUserId(principal.getId());
@ -210,7 +210,7 @@ public class FileResource extends BaseResource {
} catch (Exception e) { } catch (Exception e) {
throw new ServerException("AttachError", "Error attaching file to document", e); throw new ServerException("AttachError", "Error attaching file to document", e);
} }
// Always return OK // Always return OK
JsonObjectBuilder response = Json.createObjectBuilder() JsonObjectBuilder response = Json.createObjectBuilder()
.add("status", "ok"); .add("status", "ok");
@ -258,6 +258,67 @@ public class FileResource extends BaseResource {
.add("status", "ok"); .add("status", "ok");
return Response.ok().entity(response.build()).build(); return Response.ok().entity(response.build()).build();
} }
/**
* Process a file manually.
*
* @api {post} /file/:id/process Process a file manually
* @apiName PostFileProcess
* @apiGroup File
* @apiParam {String} id File ID
* @apiSuccess {String} status Status OK
* @apiError (client) ForbiddenError Access denied
* @apiError (client) ValidationError Validation error
* @apiError (server) ProcessingError Processing error
* @apiPermission user
* @apiVersion 1.6.0
*
* @param id File ID
* @return Response
*/
@POST
@Path("{id: [a-z0-9\\-]+}/process")
public Response process(@PathParam("id") String id) {
if (!authenticate()) {
throw new ForbiddenClientException();
}
// Get the current user
UserDao userDao = new UserDao();
User user = userDao.getById(principal.getId());
// Get the document and the file
DocumentDao documentDao = new DocumentDao();
FileDao fileDao = new FileDao();
File file = fileDao.getFile(id);
if (file == null) {
throw new NotFoundException();
}
DocumentDto documentDto = documentDao.getDocument(file.getDocumentId(), PermType.WRITE, getTargetIdList(null));
if (documentDto == null) {
throw new NotFoundException();
}
// Start the processing asynchronously
try {
java.nio.file.Path storedFile = DirectoryUtil.getStorageDirectory().resolve(id);
java.nio.file.Path unencryptedFile = EncryptionUtil.decryptFile(storedFile, user.getPrivateKey());
FileUtil.startProcessingFile(id);
FileUpdatedAsyncEvent fileUpdatedAsyncEvent = new FileUpdatedAsyncEvent();
fileUpdatedAsyncEvent.setUserId(principal.getId());
fileUpdatedAsyncEvent.setLanguage(documentDto.getLanguage());
fileUpdatedAsyncEvent.setFile(file);
fileUpdatedAsyncEvent.setUnencryptedFile(unencryptedFile);
ThreadLocalContext.get().addAsyncEvent(fileUpdatedAsyncEvent);
} catch (Exception e) {
throw new ServerException("ProcessingError", "Error processing this file", e);
}
// Always return OK
JsonObjectBuilder response = Json.createObjectBuilder()
.add("status", "ok");
return Response.ok().entity(response.build()).build();
}
/** /**
* Reorder files. * Reorder files.

View File

@ -157,4 +157,13 @@ angular.module('docs').controller('DocumentViewContent', function ($scope, $root
}) })
}); });
}; };
/**
* Process a file.
*/
$scope.processFile = function (file) {
Restangular.one('file/' + file.id).post('process').then(function () {
file.processing = true;
});
};
}); });

View File

@ -115,7 +115,8 @@
"upload_error_quota": "Quota reached", "upload_error_quota": "Quota reached",
"drop_zone": "Drag & drop files here to upload", "drop_zone": "Drag & drop files here to upload",
"add_files": "Add files", "add_files": "Add files",
"file_processing_indicator": "This file is being processed. Searching will not be available before it is complete." "file_processing_indicator": "This file is being processed. Searching will not be available before it is complete.",
"reprocess_file": "Reprocess this file"
}, },
"workflow": { "workflow": {
"workflow": "Workflow", "workflow": "Workflow",

View File

@ -72,6 +72,10 @@
<span class="fas fa-pencil-alt"></span> <span class="fas fa-pencil-alt"></span>
{{ 'rename' | translate }} {{ 'rename' | translate }}
</a> </a>
<a href ng-click="processFile(file)">
<span class="fas fa-eye"></span>
{{ 'document.view.content.reprocess_file' | translate }}
</a>
<a href ng-click="deleteFile(file)"> <a href ng-click="deleteFile(file)">
<span class="fas fa-trash"></span> <span class="fas fa-trash"></span>
{{ 'delete' | translate }} {{ 'delete' | translate }}

View File

@ -220,6 +220,11 @@ public class TestFileResource extends BaseJerseyTest {
.get(JsonObject.class); .get(JsonObject.class);
files = json.getJsonArray("files"); files = json.getJsonArray("files");
Assert.assertEquals(1, files.size()); Assert.assertEquals(1, files.size());
// Process a file
target().path("/file/" + file2Id + "/process").request()
.cookie(TokenBasedSecurityFilter.COOKIE_NAME, file1Token)
.post(Entity.form(new Form()), JsonObject.class);
} }
/** /**