From 7ed976b27a8492d8160e00c37a1183e03d0c4525 Mon Sep 17 00:00:00 2001 From: jendib Date: Sat, 17 Aug 2013 14:16:55 +0200 Subject: [PATCH] Index files OCR-ized content and documents, search on index fields --- .../docs/core/dao/jpa/DocumentDao.java | 48 ++++- .../docs/core/dao/lucene/LuceneDao.java | 184 +++++++++++++----- .../core/event/DocumentCreatedAsyncEvent.java | 41 ++++ .../core/event/DocumentDeletedAsyncEvent.java | 41 ++++ .../core/event/DocumentUpdatedAsyncEvent.java | 41 ++++ .../core/event/FileDeletedAsyncEvent.java | 41 ++++ .../core/event/RebuildIndexAsyncEvent.java | 16 ++ .../core/event/RebuildIndexAsyncListener.java | 57 ++++++ .../async/DocumentCreatedAsyncListener.java | 37 ++++ .../async/DocumentDeletedAsyncListener.java | 37 ++++ .../async/DocumentUpdatedAsyncListener.java | 37 ++++ .../async/FileCreatedAsyncListener.java | 13 +- .../async/FileDeletedAsyncListener.java | 39 ++++ .../docs/core/model/context/AppContext.java | 10 + .../docs/core/service/IndexingService.java | 28 ++- .../com/sismics/docs/core/util/FileUtil.java | 13 +- docs-parent/TODO | 4 - .../docs/rest/resource/AppResource.java | 34 +++- .../docs/rest/resource/DocumentResource.java | 26 ++- .../docs/rest/resource/FileResource.java | 7 +- .../sismics/docs/rest/TestAppResource.java | 6 + .../docs/rest/TestDocumentResource.java | 24 ++- docs-web/src/test/resources/log4j.properties | 2 +- 23 files changed, 695 insertions(+), 91 deletions(-) create mode 100644 docs-core/src/main/java/com/sismics/docs/core/event/DocumentCreatedAsyncEvent.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/event/DocumentDeletedAsyncEvent.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/event/DocumentUpdatedAsyncEvent.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/event/FileDeletedAsyncEvent.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/event/RebuildIndexAsyncEvent.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/event/RebuildIndexAsyncListener.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentCreatedAsyncListener.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentDeletedAsyncListener.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentUpdatedAsyncListener.java create mode 100644 docs-core/src/main/java/com/sismics/docs/core/listener/async/FileDeletedAsyncListener.java diff --git a/docs-core/src/main/java/com/sismics/docs/core/dao/jpa/DocumentDao.java b/docs-core/src/main/java/com/sismics/docs/core/dao/jpa/DocumentDao.java index 443bfaa8..426bd778 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/dao/jpa/DocumentDao.java +++ b/docs-core/src/main/java/com/sismics/docs/core/dao/jpa/DocumentDao.java @@ -1,8 +1,23 @@ package com.sismics.docs.core.dao.jpa; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; + +import javax.persistence.EntityManager; +import javax.persistence.NoResultException; +import javax.persistence.Query; + import com.google.common.base.Joiner; +import com.google.common.base.Strings; import com.sismics.docs.core.dao.jpa.criteria.DocumentCriteria; import com.sismics.docs.core.dao.jpa.dto.DocumentDto; +import com.sismics.docs.core.dao.lucene.LuceneDao; import com.sismics.docs.core.model.jpa.Document; import com.sismics.docs.core.util.jpa.PaginatedList; import com.sismics.docs.core.util.jpa.PaginatedLists; @@ -10,12 +25,6 @@ import com.sismics.docs.core.util.jpa.QueryParam; import com.sismics.docs.core.util.jpa.SortCriteria; import com.sismics.util.context.ThreadLocalContext; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.Query; -import java.sql.Timestamp; -import java.util.*; - /** * Document DAO. * @@ -40,6 +49,18 @@ public class DocumentDao { return document.getId(); } + /** + * Returns the list of all documents. + * + * @return List of documents + */ + @SuppressWarnings("unchecked") + public List findAll() { + EntityManager em = ThreadLocalContext.get().getEntityManager(); + Query q = em.createQuery("select d from Document d where d.deleteDate is null"); + return q.getResultList(); + } + /** * Returns an active document. * @@ -118,8 +139,9 @@ public class DocumentDao { * @param paginatedList List of documents (updated by side effects) * @param criteria Search criteria * @return List of document + * @throws Exception */ - public void findByCriteria(PaginatedList paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) { + public void findByCriteria(PaginatedList paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception { Map parameterMap = new HashMap(); List criteriaList = new ArrayList(); @@ -133,9 +155,15 @@ public class DocumentDao { criteriaList.add("d.DOC_IDUSER_C = :userId"); parameterMap.put("userId", criteria.getUserId()); } - if (criteria.getSearch() != null) { - criteriaList.add("(d.DOC_TITLE_C LIKE :search OR d.DOC_DESCRIPTION_C LIKE :search OR f.FIL_CONTENT_C LIKE :search)"); - parameterMap.put("search", "%" + criteria.getSearch() + "%"); + if (!Strings.isNullOrEmpty(criteria.getSearch())) { + LuceneDao luceneDao = new LuceneDao(); + Set documentIdList = luceneDao.search(criteria.getUserId(), criteria.getSearch()); + if (documentIdList.size() == 0) { + // If the search doesn't find any document, the request should return nothing + documentIdList.add(UUID.randomUUID().toString()); + } + criteriaList.add("d.DOC_ID_C in :documentIdList"); + parameterMap.put("documentIdList", documentIdList); } if (criteria.getCreateDateMin() != null) { criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin"); diff --git a/docs-core/src/main/java/com/sismics/docs/core/dao/lucene/LuceneDao.java b/docs-core/src/main/java/com/sismics/docs/core/dao/lucene/LuceneDao.java index 466728bb..4aa5039e 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/dao/lucene/LuceneDao.java +++ b/docs-core/src/main/java/com/sismics/docs/core/dao/lucene/LuceneDao.java @@ -1,8 +1,10 @@ package com.sismics.docs.core.dao.lucene; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.lucene.document.Field; @@ -18,12 +20,12 @@ import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.Version; import com.sismics.docs.core.model.context.AppContext; +import com.sismics.docs.core.model.jpa.Document; import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.util.LuceneUtil; import com.sismics.docs.core.util.LuceneUtil.LuceneRunnable; @@ -40,114 +42,190 @@ public class LuceneDao { * * @param fileList */ - public void rebuildIndex(final List fileList) { + public void rebuildIndex(final List documentList, final List fileList) { LuceneUtil.handle(new LuceneRunnable() { @Override public void run(IndexWriter indexWriter) throws Exception { // Empty index indexWriter.deleteAll(); - // Add all files - for (File file : fileList) { - org.apache.lucene.document.Document document = getDocumentFromFile(file); - indexWriter.addDocument(document); + // Add all documents + Map documentMap = new HashMap<>(); + for (Document document : documentList) { + org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document); + indexWriter.addDocument(luceneDocument); + documentMap.put(document.getId(), document); } - } - }); - } - - - /** - * Add files to the index. - * - * @param fileList - */ - public void create(final List fileList) { - LuceneUtil.handle(new LuceneRunnable() { - @Override - public void run(IndexWriter indexWriter) throws Exception { + // Add all files for (File file : fileList) { - org.apache.lucene.document.Document document = getDocumentFromFile(file); - indexWriter.addDocument(document); + org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, documentMap.get(file.getDocumentId())); + indexWriter.addDocument(luceneDocument); } } }); } /** - * Update index. + * Add document to the index. * - * @param fileList File list + * @param document Document to add */ - public void update(final List fileList) { + public void createDocument(final Document document) { LuceneUtil.handle(new LuceneRunnable() { @Override public void run(IndexWriter indexWriter) throws Exception { - // Update all files - for (File file : fileList) { - org.apache.lucene.document.Document document = getDocumentFromFile(file); - indexWriter.updateDocument(new Term("id", file.getId()), document); - } + org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document); + indexWriter.addDocument(luceneDocument); } }); } - + + /** + * Add file to the index. + * + * @param file File to add + * @param document Document linked to the file + */ + public void createFile(final File file, final Document document) { + LuceneUtil.handle(new LuceneRunnable() { + @Override + public void run(IndexWriter indexWriter) throws Exception { + org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, document); + indexWriter.addDocument(luceneDocument); + } + }); + } + + /** + * Update document index. + * + * @param document Updated document + */ + public void updateDocument(final Document document) { + LuceneUtil.handle(new LuceneRunnable() { + @Override + public void run(IndexWriter indexWriter) throws Exception { + org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document); + indexWriter.updateDocument(new Term("id", document.getId()), luceneDocument); + } + }); + } + + /** + * Update file index. + * + * @param file Updated file + * @param document Document linked to the file + */ + public void updateFile(final File file, final Document document) { + LuceneUtil.handle(new LuceneRunnable() { + @Override + public void run(IndexWriter indexWriter) throws Exception { + org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, document); + indexWriter.updateDocument(new Term("id", file.getId()), luceneDocument); + } + }); + } + + /** + * Delete document from the index. + * + * @param id Document ID to delete + */ + public void deleteDocument(final String id) { + LuceneUtil.handle(new LuceneRunnable() { + @Override + public void run(IndexWriter indexWriter) throws Exception { + indexWriter.deleteDocuments(new Term("id", id)); + } + }); + } + /** * Search files. * - * @param paginatedList - * @param feedList - * @param searchQuery - * @return List of file IDs + * @param userId User ID to filter on + * @param searchQuery Search query + * @return List of document IDs * @throws Exception */ - public Set search(String userId, String searchQuery, int limit) throws Exception { + public Set search(String userId, String searchQuery) throws Exception { // Escape query and add quotes so QueryParser generate a PhraseQuery searchQuery = "\"" + QueryParserUtil.escape(searchQuery) + "\""; // Build search query StandardQueryParser qpHelper = new StandardQueryParser(new DocsStandardAnalyzer(Version.LUCENE_42)); qpHelper.setPhraseSlop(100000); // PhraseQuery add terms - Query contentQuery = qpHelper.parse(searchQuery, "content"); - // Search on file content + // Search on documents and files BooleanQuery query = new BooleanQuery(); - query.add(contentQuery, Occur.SHOULD); + query.add(qpHelper.parse(searchQuery, "content"), Occur.SHOULD); + query.add(qpHelper.parse(searchQuery, "title"), Occur.SHOULD); + query.add(qpHelper.parse(searchQuery, "description"), Occur.SHOULD); // Filter on provided user ID List terms = new ArrayList(); - terms.add(new Term("user_id", userId)); - TermsFilter feedsFilter = new TermsFilter(terms); + if (userId != null) { + terms.add(new Term("user_id", userId)); + } + TermsFilter userFilter = new TermsFilter(terms); // Search IndexReader reader = DirectoryReader.open(AppContext.getInstance().getLuceneDirectory()); IndexSearcher searcher = new IndexSearcher(reader); - TopDocs topDocs = searcher.search(query, feedsFilter, limit); + TopDocs topDocs = searcher.search(query, userFilter, Integer.MAX_VALUE); ScoreDoc[] docs = topDocs.scoreDocs; - // Extract file IDs - Set fileIdList = new HashSet(); + // Extract document IDs + Set documentIdList = new HashSet(); for (int i = 0; i < docs.length; i++) { - String id = searcher.doc(docs[i].doc).get("id"); - fileIdList.add(id); + org.apache.lucene.document.Document document = searcher.doc(docs[i].doc); + String type = document.get("type"); + String documentId = null; + if (type.equals("document")) { + documentId = document.get("id"); + } else if (type.equals("file")) { + documentId = document.get("document_id"); + } + documentIdList.add(documentId); } - return fileIdList; + return documentIdList; + } + + /** + * Build Lucene document from database document. + * + * @param document Document + * @return Document + */ + private org.apache.lucene.document.Document getDocumentFromDocument(Document document) { + org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); + luceneDocument.add(new StringField("id", document.getId(), Field.Store.YES)); + luceneDocument.add(new StringField("user_id", document.getUserId(), Field.Store.YES)); + luceneDocument.add(new StringField("type", "document", Field.Store.YES)); + luceneDocument.add(new TextField("title", document.getTitle(), Field.Store.NO)); + luceneDocument.add(new TextField("description", document.getDescription(), Field.Store.NO)); + + return luceneDocument; } /** * Build Lucene document from file. * * @param file File + * @param document Document linked to the file * @return Document */ - private org.apache.lucene.document.Document getDocumentFromFile(File file) { - // Building document - org.apache.lucene.document.Document document = new org.apache.lucene.document.Document(); - document.add(new StringField("id", file.getId(), Field.Store.YES)); - document.add(new TextField("content", file.getContent(), Field.Store.NO)); + private org.apache.lucene.document.Document getDocumentFromFile(File file, Document document) { + org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); + luceneDocument.add(new StringField("id", file.getId(), Field.Store.YES)); + luceneDocument.add(new StringField("user_id", document.getUserId(), Field.Store.YES)); + luceneDocument.add(new StringField("type", "file", Field.Store.YES)); + luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES)); + luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO)); - return document; + return luceneDocument; } } diff --git a/docs-core/src/main/java/com/sismics/docs/core/event/DocumentCreatedAsyncEvent.java b/docs-core/src/main/java/com/sismics/docs/core/event/DocumentCreatedAsyncEvent.java new file mode 100644 index 00000000..a0a61161 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/event/DocumentCreatedAsyncEvent.java @@ -0,0 +1,41 @@ +package com.sismics.docs.core.event; + +import com.google.common.base.Objects; +import com.sismics.docs.core.model.jpa.Document; + +/** + * Document created event. + * + * @author bgamard + */ +public class DocumentCreatedAsyncEvent { + /** + * Created document. + */ + private Document document; + + /** + * Getter of document. + * + * @return the document + */ + public Document getDocument() { + return document; + } + + /** + * Setter of document. + * + * @param document document + */ + public void setDocument(Document document) { + this.document = document; + } + + @Override + public String toString() { + return Objects.toStringHelper(this) + .add("document", document) + .toString(); + } +} \ No newline at end of file diff --git a/docs-core/src/main/java/com/sismics/docs/core/event/DocumentDeletedAsyncEvent.java b/docs-core/src/main/java/com/sismics/docs/core/event/DocumentDeletedAsyncEvent.java new file mode 100644 index 00000000..5ffe10a6 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/event/DocumentDeletedAsyncEvent.java @@ -0,0 +1,41 @@ +package com.sismics.docs.core.event; + +import com.google.common.base.Objects; +import com.sismics.docs.core.model.jpa.Document; + +/** + * Document deleted event. + * + * @author bgamard + */ +public class DocumentDeletedAsyncEvent { + /** + * Created document. + */ + private Document document; + + /** + * Getter of document. + * + * @return the document + */ + public Document getDocument() { + return document; + } + + /** + * Setter of document. + * + * @param document document + */ + public void setDocument(Document document) { + this.document = document; + } + + @Override + public String toString() { + return Objects.toStringHelper(this) + .add("document", document) + .toString(); + } +} \ No newline at end of file diff --git a/docs-core/src/main/java/com/sismics/docs/core/event/DocumentUpdatedAsyncEvent.java b/docs-core/src/main/java/com/sismics/docs/core/event/DocumentUpdatedAsyncEvent.java new file mode 100644 index 00000000..ab322071 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/event/DocumentUpdatedAsyncEvent.java @@ -0,0 +1,41 @@ +package com.sismics.docs.core.event; + +import com.google.common.base.Objects; +import com.sismics.docs.core.model.jpa.Document; + +/** + * Document updated event. + * + * @author bgamard + */ +public class DocumentUpdatedAsyncEvent { + /** + * Created document. + */ + private Document document; + + /** + * Getter of document. + * + * @return the document + */ + public Document getDocument() { + return document; + } + + /** + * Setter of document. + * + * @param document document + */ + public void setDocument(Document document) { + this.document = document; + } + + @Override + public String toString() { + return Objects.toStringHelper(this) + .add("document", document) + .toString(); + } +} \ No newline at end of file diff --git a/docs-core/src/main/java/com/sismics/docs/core/event/FileDeletedAsyncEvent.java b/docs-core/src/main/java/com/sismics/docs/core/event/FileDeletedAsyncEvent.java new file mode 100644 index 00000000..98172417 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/event/FileDeletedAsyncEvent.java @@ -0,0 +1,41 @@ +package com.sismics.docs.core.event; + +import com.google.common.base.Objects; +import com.sismics.docs.core.model.jpa.File; + +/** + * File deleted event. + * + * @author bgamard + */ +public class FileDeletedAsyncEvent { + /** + * Deleted file. + */ + private File file; + + /** + * Getter of file. + * + * @return the file + */ + public File getFile() { + return file; + } + + /** + * Setter of file. + * + * @param file file + */ + public void setFile(File file) { + this.file = file; + } + + @Override + public String toString() { + return Objects.toStringHelper(this) + .add("file", file) + .toString(); + } +} \ No newline at end of file diff --git a/docs-core/src/main/java/com/sismics/docs/core/event/RebuildIndexAsyncEvent.java b/docs-core/src/main/java/com/sismics/docs/core/event/RebuildIndexAsyncEvent.java new file mode 100644 index 00000000..85383b84 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/event/RebuildIndexAsyncEvent.java @@ -0,0 +1,16 @@ +package com.sismics.docs.core.event; + +import com.google.common.base.Objects; + +/** + * Rebuild index event. + * + * @author bgamard + */ +public class RebuildIndexAsyncEvent { + @Override + public String toString() { + return Objects.toStringHelper(this) + .toString(); + } +} diff --git a/docs-core/src/main/java/com/sismics/docs/core/event/RebuildIndexAsyncListener.java b/docs-core/src/main/java/com/sismics/docs/core/event/RebuildIndexAsyncListener.java new file mode 100644 index 00000000..4b1f5274 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/event/RebuildIndexAsyncListener.java @@ -0,0 +1,57 @@ +package com.sismics.docs.core.event; + +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.eventbus.Subscribe; +import com.sismics.docs.core.dao.jpa.DocumentDao; +import com.sismics.docs.core.dao.jpa.FileDao; +import com.sismics.docs.core.dao.lucene.LuceneDao; +import com.sismics.docs.core.model.jpa.Document; +import com.sismics.docs.core.model.jpa.File; +import com.sismics.docs.core.util.TransactionUtil; + +/** + * Listener on rebuild index. + * + * @author bgamard + */ +public class RebuildIndexAsyncListener { + /** + * Logger. + */ + private static final Logger log = LoggerFactory.getLogger(RebuildIndexAsyncListener.class); + + /** + * Rebuild Lucene index. + * + * @param rebuildIndexAsyncEvent Index rebuild event + * @throws Exception + */ + @Subscribe + public void onArticleCreated(final RebuildIndexAsyncEvent rebuildIndexAsyncEvent) throws Exception { + if (log.isInfoEnabled()) { + log.info("Rebuild index event: " + rebuildIndexAsyncEvent.toString()); + } + + // Fetch all documents and files + TransactionUtil.handle(new Runnable() { + @Override + public void run() { + // Fetch all documents + DocumentDao documentDao = new DocumentDao(); + List documentList = documentDao.findAll(); + + // Fetch all files + FileDao fileDao = new FileDao(); + List fileList = fileDao.findAll(); + + // Rebuild index + LuceneDao luceneDao = new LuceneDao(); + luceneDao.rebuildIndex(documentList, fileList); + } + }); + } +} diff --git a/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentCreatedAsyncListener.java b/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentCreatedAsyncListener.java new file mode 100644 index 00000000..4ecf4fd2 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentCreatedAsyncListener.java @@ -0,0 +1,37 @@ +package com.sismics.docs.core.listener.async; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.eventbus.Subscribe; +import com.sismics.docs.core.dao.lucene.LuceneDao; +import com.sismics.docs.core.event.DocumentCreatedAsyncEvent; + +/** + * Listener on document created. + * + * @author bgamard + */ +public class DocumentCreatedAsyncListener { + /** + * Logger. + */ + private static final Logger log = LoggerFactory.getLogger(DocumentCreatedAsyncListener.class); + + /** + * Document created. + * + * @param documentCreatedAsyncEvent Document created event + * @throws Exception + */ + @Subscribe + public void on(final DocumentCreatedAsyncEvent documentCreatedAsyncEvent) throws Exception { + if (log.isInfoEnabled()) { + log.info("Document created event: " + documentCreatedAsyncEvent.toString()); + } + + // Update Lucene index + LuceneDao luceneDao = new LuceneDao(); + luceneDao.createDocument(documentCreatedAsyncEvent.getDocument()); + } +} diff --git a/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentDeletedAsyncListener.java b/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentDeletedAsyncListener.java new file mode 100644 index 00000000..1eb04570 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentDeletedAsyncListener.java @@ -0,0 +1,37 @@ +package com.sismics.docs.core.listener.async; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.eventbus.Subscribe; +import com.sismics.docs.core.dao.lucene.LuceneDao; +import com.sismics.docs.core.event.DocumentDeletedAsyncEvent; + +/** + * Listener on document deleted. + * + * @author bgamard + */ +public class DocumentDeletedAsyncListener { + /** + * Logger. + */ + private static final Logger log = LoggerFactory.getLogger(DocumentDeletedAsyncListener.class); + + /** + * Document deleted. + * + * @param documentDeletedAsyncEvent Document deleted event + * @throws Exception + */ + @Subscribe + public void on(final DocumentDeletedAsyncEvent documentDeletedAsyncEvent) throws Exception { + if (log.isInfoEnabled()) { + log.info("Document deleted event: " + documentDeletedAsyncEvent.toString()); + } + + // Update Lucene index + LuceneDao luceneDao = new LuceneDao(); + luceneDao.deleteDocument(documentDeletedAsyncEvent.getDocument().getId()); + } +} diff --git a/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentUpdatedAsyncListener.java b/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentUpdatedAsyncListener.java new file mode 100644 index 00000000..8824bd5d --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/listener/async/DocumentUpdatedAsyncListener.java @@ -0,0 +1,37 @@ +package com.sismics.docs.core.listener.async; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.eventbus.Subscribe; +import com.sismics.docs.core.dao.lucene.LuceneDao; +import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent; + +/** + * Listener on document updated. + * + * @author bgamard + */ +public class DocumentUpdatedAsyncListener { + /** + * Logger. + */ + private static final Logger log = LoggerFactory.getLogger(DocumentUpdatedAsyncListener.class); + + /** + * Document updated. + * + * @param documentUpdatedAsyncEvent Document updated event + * @throws Exception + */ + @Subscribe + public void on(final DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent) throws Exception { + if (log.isInfoEnabled()) { + log.info("Document updated event: " + documentUpdatedAsyncEvent.toString()); + } + + // Update Lucene index + LuceneDao luceneDao = new LuceneDao(); + luceneDao.updateDocument(documentUpdatedAsyncEvent.getDocument()); + } +} diff --git a/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileCreatedAsyncListener.java b/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileCreatedAsyncListener.java index 2d6be9f8..876ace67 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileCreatedAsyncListener.java +++ b/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileCreatedAsyncListener.java @@ -6,12 +6,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.eventbus.Subscribe; +import com.sismics.docs.core.dao.lucene.LuceneDao; import com.sismics.docs.core.event.FileCreatedAsyncEvent; import com.sismics.docs.core.util.FileUtil; import com.sismics.util.ImageUtil; /** - * Listener on new file. + * Listener on file created. * * @author bgamard */ @@ -22,13 +23,13 @@ public class FileCreatedAsyncListener { private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class); /** - * Process new file. + * File created. * - * @param fileCreatedAsyncEvent New file created event + * @param fileCreatedAsyncEvent File created event * @throws Exception */ @Subscribe - public void onFileCreated(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception { + public void on(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception { if (log.isInfoEnabled()) { log.info("File created event: " + fileCreatedAsyncEvent.toString()); } @@ -39,5 +40,9 @@ public class FileCreatedAsyncListener { FileUtil.ocrFile(fileCreatedAsyncEvent.getDocument(), fileCreatedAsyncEvent.getFile()); log.info(MessageFormat.format("File OCR-ized in {0}ms", System.currentTimeMillis() - startTime)); } + + // Update Lucene index + LuceneDao luceneDao = new LuceneDao(); + luceneDao.createFile(fileCreatedAsyncEvent.getFile(), fileCreatedAsyncEvent.getDocument()); } } diff --git a/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileDeletedAsyncListener.java b/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileDeletedAsyncListener.java new file mode 100644 index 00000000..3a08c984 --- /dev/null +++ b/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileDeletedAsyncListener.java @@ -0,0 +1,39 @@ +package com.sismics.docs.core.listener.async; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.eventbus.Subscribe; +import com.sismics.docs.core.dao.lucene.LuceneDao; +import com.sismics.docs.core.event.FileDeletedAsyncEvent; + +/** + * Listener on file deleted. + * + * @author bgamard + */ +public class FileDeletedAsyncListener { + /** + * Logger. + */ + private static final Logger log = LoggerFactory.getLogger(FileDeletedAsyncListener.class); + + /** + * File deleted. + * + * @param fileDeletedAsyncEvent File deleted event + * @throws Exception + */ + @Subscribe + public void on(final FileDeletedAsyncEvent fileDeletedAsyncEvent) throws Exception { + if (log.isInfoEnabled()) { + log.info("File deleted event: " + fileDeletedAsyncEvent.toString()); + } + + // TODO Delete the file from storage + + // Update Lucene index + LuceneDao luceneDao = new LuceneDao(); + luceneDao.deleteDocument(fileDeletedAsyncEvent.getFile().getId()); + } +} diff --git a/docs-core/src/main/java/com/sismics/docs/core/model/context/AppContext.java b/docs-core/src/main/java/com/sismics/docs/core/model/context/AppContext.java index e0bae3e1..253074d2 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/model/context/AppContext.java +++ b/docs-core/src/main/java/com/sismics/docs/core/model/context/AppContext.java @@ -13,7 +13,12 @@ import com.google.common.eventbus.AsyncEventBus; import com.google.common.eventbus.EventBus; import com.sismics.docs.core.constant.ConfigType; import com.sismics.docs.core.dao.jpa.ConfigDao; +import com.sismics.docs.core.event.RebuildIndexAsyncListener; +import com.sismics.docs.core.listener.async.DocumentCreatedAsyncListener; +import com.sismics.docs.core.listener.async.DocumentDeletedAsyncListener; +import com.sismics.docs.core.listener.async.DocumentUpdatedAsyncListener; import com.sismics.docs.core.listener.async.FileCreatedAsyncListener; +import com.sismics.docs.core.listener.async.FileDeletedAsyncListener; import com.sismics.docs.core.listener.sync.DeadEventListener; import com.sismics.docs.core.model.jpa.Config; import com.sismics.docs.core.service.IndexingService; @@ -80,6 +85,11 @@ public class AppContext { asyncEventBus = newAsyncEventBus(); asyncEventBus.register(new FileCreatedAsyncListener()); + asyncEventBus.register(new FileDeletedAsyncListener()); + asyncEventBus.register(new DocumentCreatedAsyncListener()); + asyncEventBus.register(new DocumentUpdatedAsyncListener()); + asyncEventBus.register(new DocumentDeletedAsyncListener()); + asyncEventBus.register(new RebuildIndexAsyncListener()); } /** diff --git a/docs-core/src/main/java/com/sismics/docs/core/service/IndexingService.java b/docs-core/src/main/java/com/sismics/docs/core/service/IndexingService.java index 1f9513a3..e6def46f 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/service/IndexingService.java +++ b/docs-core/src/main/java/com/sismics/docs/core/service/IndexingService.java @@ -1,10 +1,9 @@ package com.sismics.docs.core.service; -import com.google.common.util.concurrent.AbstractScheduledService; -import com.sismics.docs.core.constant.Constants; -import com.sismics.docs.core.model.context.AppContext; -import com.sismics.docs.core.util.DirectoryUtil; -import com.sismics.docs.core.util.TransactionUtil; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.SimpleFSDirectory; @@ -12,9 +11,12 @@ import org.apache.lucene.store.SimpleFSLockFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.IOException; -import java.util.concurrent.TimeUnit; +import com.google.common.util.concurrent.AbstractScheduledService; +import com.sismics.docs.core.constant.Constants; +import com.sismics.docs.core.event.RebuildIndexAsyncEvent; +import com.sismics.docs.core.model.context.AppContext; +import com.sismics.docs.core.util.DirectoryUtil; +import com.sismics.docs.core.util.TransactionUtil; /** * Indexing service. @@ -85,6 +87,16 @@ public class IndexingService extends AbstractScheduledService { return Scheduler.newFixedDelaySchedule(0, 1, TimeUnit.HOURS); } + /** + * Destroy and rebuild Lucene index. + * + * @throws Exception + */ + public void rebuildIndex() throws Exception { + RebuildIndexAsyncEvent rebuildIndexAsyncEvent = new RebuildIndexAsyncEvent(); + AppContext.getInstance().getAsyncEventBus().post(rebuildIndexAsyncEvent); + } + /** * Getter of directory. * diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java b/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java index c896950c..45557465 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java +++ b/docs-core/src/main/java/com/sismics/docs/core/util/FileUtil.java @@ -1,6 +1,8 @@ package com.sismics.docs.core.util; +import java.awt.color.ColorSpace; import java.awt.image.BufferedImage; +import java.awt.image.ColorConvertOp; import java.io.IOException; import java.nio.file.Paths; @@ -46,12 +48,11 @@ public class FileUtil { log.error("Error reading the image " + storedfile, e); } - // Upscale the image if it is too small - if (image.getWidth() < 2500 || image.getHeight() < 2500) { - BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500); - image.flush(); - image = resizedImage; - } + // Upscale and grayscale the image + BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500, + new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null)); + image.flush(); + image = resizedImage; // OCR the file try { diff --git a/docs-parent/TODO b/docs-parent/TODO index 413e0ed8..e69de29b 100644 --- a/docs-parent/TODO +++ b/docs-parent/TODO @@ -1,4 +0,0 @@ -- Index title and description (server) -- Use Lucene for title and description searching (server) -- Index OCR-ized content (server) -- Batch to rebuild Lucene index (server) \ No newline at end of file diff --git a/docs-web/src/main/java/com/sismics/docs/rest/resource/AppResource.java b/docs-web/src/main/java/com/sismics/docs/rest/resource/AppResource.java index da1ba190..1db29d34 100644 --- a/docs-web/src/main/java/com/sismics/docs/rest/resource/AppResource.java +++ b/docs-web/src/main/java/com/sismics/docs/rest/resource/AppResource.java @@ -22,6 +22,7 @@ import com.sismics.docs.core.dao.jpa.DocumentDao; import com.sismics.docs.core.dao.jpa.FileDao; import com.sismics.docs.core.dao.jpa.criteria.DocumentCriteria; import com.sismics.docs.core.dao.jpa.dto.DocumentDto; +import com.sismics.docs.core.model.context.AppContext; import com.sismics.docs.core.model.jpa.Document; import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.util.ConfigUtil; @@ -68,7 +69,11 @@ public class AppResource extends BaseResource { SortCriteria sortCriteria = new SortCriteria(0, true); DocumentCriteria documentCriteria = new DocumentCriteria(); documentCriteria.setUserId(principal.getId()); - documentDao.findByCriteria(paginatedList, documentCriteria, sortCriteria); + try { + documentDao.findByCriteria(paginatedList, documentCriteria, sortCriteria); + } catch (Exception e) { + throw new ServerException("SearchError", "Error searching in documents", e); + } response.put("document_count", paginatedList.getResultCount()); // General data @@ -146,7 +151,7 @@ public class AppResource extends BaseResource { @POST @Path("batch/ocr") @Produces(MediaType.APPLICATION_JSON) - public Response batchReindex() throws JSONException { + public Response batchOcr() throws JSONException { if (!authenticate()) { throw new ForbiddenClientException(); } @@ -164,4 +169,29 @@ public class AppResource extends BaseResource { response.put("status", "ok"); return Response.ok().entity(response).build(); } + + /** + * Destroy and rebuild Lucene index. + * + * @return Response + * @throws JSONException + */ + @POST + @Path("batch/reindex") + @Produces(MediaType.APPLICATION_JSON) + public Response batchReindex() throws JSONException { + if (!authenticate()) { + throw new ForbiddenClientException(); + } + checkBaseFunction(BaseFunction.ADMIN); + + JSONObject response = new JSONObject(); + try { + AppContext.getInstance().getIndexingService().rebuildIndex(); + } catch (Exception e) { + throw new ServerException("IndexingError", "Error rebuilding index", e); + } + response.put("status", "ok"); + return Response.ok().entity(response).build(); + } } diff --git a/docs-web/src/main/java/com/sismics/docs/rest/resource/DocumentResource.java b/docs-web/src/main/java/com/sismics/docs/rest/resource/DocumentResource.java index 1ca40807..6fa12683 100644 --- a/docs-web/src/main/java/com/sismics/docs/rest/resource/DocumentResource.java +++ b/docs-web/src/main/java/com/sismics/docs/rest/resource/DocumentResource.java @@ -38,6 +38,10 @@ import com.sismics.docs.core.dao.jpa.TagDao; import com.sismics.docs.core.dao.jpa.criteria.DocumentCriteria; import com.sismics.docs.core.dao.jpa.dto.DocumentDto; import com.sismics.docs.core.dao.jpa.dto.TagDto; +import com.sismics.docs.core.event.DocumentCreatedAsyncEvent; +import com.sismics.docs.core.event.DocumentDeletedAsyncEvent; +import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent; +import com.sismics.docs.core.model.context.AppContext; import com.sismics.docs.core.model.jpa.Document; import com.sismics.docs.core.model.jpa.Share; import com.sismics.docs.core.model.jpa.Tag; @@ -46,6 +50,7 @@ import com.sismics.docs.core.util.jpa.PaginatedLists; import com.sismics.docs.core.util.jpa.SortCriteria; import com.sismics.rest.exception.ClientException; import com.sismics.rest.exception.ForbiddenClientException; +import com.sismics.rest.exception.ServerException; import com.sismics.rest.util.ValidationUtil; /** @@ -148,7 +153,11 @@ public class DocumentResource extends BaseResource { SortCriteria sortCriteria = new SortCriteria(sortColumn, asc); DocumentCriteria documentCriteria = parseSearchQuery(search); documentCriteria.setUserId(principal.getId()); - documentDao.findByCriteria(paginatedList, documentCriteria, sortCriteria); + try { + documentDao.findByCriteria(paginatedList, documentCriteria, sortCriteria); + } catch (Exception e) { + throw new ServerException("SearchError", "Error searching in documents", e); + } for (DocumentDto documentDto : paginatedList.getResultList()) { JSONObject document = new JSONObject(); @@ -296,6 +305,11 @@ public class DocumentResource extends BaseResource { // Update tags updateTagList(documentId, tagList); + // Raise a document created event + DocumentCreatedAsyncEvent documentCreatedAsyncEvent = new DocumentCreatedAsyncEvent(); + documentCreatedAsyncEvent.setDocument(document); + AppContext.getInstance().getAsyncEventBus().post(documentCreatedAsyncEvent); + JSONObject response = new JSONObject(); response.put("id", documentId); return Response.ok().entity(response).build(); @@ -358,6 +372,11 @@ public class DocumentResource extends BaseResource { // Update tags updateTagList(id, tagList); + // Raise a document updated event + DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent(); + documentUpdatedAsyncEvent.setDocument(document); + AppContext.getInstance().getAsyncEventBus().post(documentUpdatedAsyncEvent); + // Always return ok JSONObject response = new JSONObject(); response.put("id", id); @@ -415,6 +434,11 @@ public class DocumentResource extends BaseResource { throw new ClientException("DocumentNotFound", MessageFormat.format("Document not found: {0}", id)); } + // Raise a document deleted event + DocumentDeletedAsyncEvent documentDeletedAsyncEvent = new DocumentDeletedAsyncEvent(); + documentDeletedAsyncEvent.setDocument(document); + AppContext.getInstance().getAsyncEventBus().post(documentDeletedAsyncEvent); + // Delete the document documentDao.delete(document.getId()); diff --git a/docs-web/src/main/java/com/sismics/docs/rest/resource/FileResource.java b/docs-web/src/main/java/com/sismics/docs/rest/resource/FileResource.java index 2a8e4d18..db3eafa7 100644 --- a/docs-web/src/main/java/com/sismics/docs/rest/resource/FileResource.java +++ b/docs-web/src/main/java/com/sismics/docs/rest/resource/FileResource.java @@ -30,6 +30,7 @@ import com.sismics.docs.core.dao.jpa.DocumentDao; import com.sismics.docs.core.dao.jpa.FileDao; import com.sismics.docs.core.dao.jpa.ShareDao; import com.sismics.docs.core.event.FileCreatedAsyncEvent; +import com.sismics.docs.core.event.FileDeletedAsyncEvent; import com.sismics.docs.core.model.context.AppContext; import com.sismics.docs.core.model.jpa.Document; import com.sismics.docs.core.model.jpa.File; @@ -247,8 +248,12 @@ public class FileResource extends BaseResource { throw new ClientException("FileNotFound", MessageFormat.format("File not found: {0}", id)); } + // Raise a new file deleted event + FileDeletedAsyncEvent fileDeletedAsyncEvent = new FileDeletedAsyncEvent(); + fileDeletedAsyncEvent.setFile(file); + AppContext.getInstance().getAsyncEventBus().post(fileDeletedAsyncEvent); + // Delete the file - // TODO Delete the file from storage too fileDao.delete(file.getId()); // Always return ok diff --git a/docs-web/src/test/java/com/sismics/docs/rest/TestAppResource.java b/docs-web/src/test/java/com/sismics/docs/rest/TestAppResource.java index c9470a78..c7dad34b 100644 --- a/docs-web/src/test/java/com/sismics/docs/rest/TestAppResource.java +++ b/docs-web/src/test/java/com/sismics/docs/rest/TestAppResource.java @@ -49,6 +49,12 @@ public class TestAppResource extends BaseJerseyTest { response = appResource.post(ClientResponse.class); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); json = response.getEntity(JSONObject.class); + + // Rebuild Lucene index + appResource = resource().path("/app/batch/reindex"); + appResource.addFilter(new CookieAuthenticationFilter(adminAuthenticationToken)); + response = appResource.post(ClientResponse.class); + Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); } /** diff --git a/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java b/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java index b0f0a3d2..6a449868 100644 --- a/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java +++ b/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java @@ -54,7 +54,7 @@ public class TestDocumentResource extends BaseJerseyTest { WebResource documentResource = resource().path("/document"); documentResource.addFilter(new CookieAuthenticationFilter(document1Token)); postParams = new MultivaluedMapImpl(); - postParams.add("title", "My super document 1"); + postParams.add("title", "My super title document 1"); postParams.add("description", "My super description for document 1"); postParams.add("tags", tag1Id); postParams.add("language", "eng"); @@ -121,6 +121,28 @@ public class TestDocumentResource extends BaseJerseyTest { Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id")); Assert.assertEquals(create1Date, documents.getJSONObject(0).getLong("create_date")); + // Search documents by query + documentResource = resource().path("/document/list"); + documentResource.addFilter(new CookieAuthenticationFilter(document1Token)); + getParams = new MultivaluedMapImpl(); + getParams.putSingle("search", "title"); + response = documentResource.queryParams(getParams).get(ClientResponse.class); + json = response.getEntity(JSONObject.class); + Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); + documents = json.getJSONArray("documents"); + Assert.assertTrue(documents.length() == 1); + + // Search documents by query + documentResource = resource().path("/document/list"); + documentResource.addFilter(new CookieAuthenticationFilter(document1Token)); + getParams = new MultivaluedMapImpl(); + getParams.putSingle("search", "description"); + response = documentResource.queryParams(getParams).get(ClientResponse.class); + json = response.getEntity(JSONObject.class); + Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); + documents = json.getJSONArray("documents"); + Assert.assertTrue(documents.length() == 1); + // Search documents by date documentResource = resource().path("/document/list"); documentResource.addFilter(new CookieAuthenticationFilter(document1Token)); diff --git a/docs-web/src/test/resources/log4j.properties b/docs-web/src/test/resources/log4j.properties index ed651462..c4c13395 100644 --- a/docs-web/src/test/resources/log4j.properties +++ b/docs-web/src/test/resources/log4j.properties @@ -6,4 +6,4 @@ log4j.appender.MEMORY=com.sismics.util.log4j.MemoryAppender log4j.appender.MEMORY.size=1000 log4j.logger.com.sismics=DEBUG -log4j.logger.org.hibernate.internal.util.EntityPrinter=INFO \ No newline at end of file +log4j.logger.org.hibernate=ERROR \ No newline at end of file