mirror of
https://github.com/sismics/docs.git
synced 2024-11-24 22:57:56 +01:00
Index files OCR-ized content and documents, search on index fields
This commit is contained in:
parent
5507d4ca57
commit
7ed976b27a
@ -1,8 +1,23 @@
|
||||
package com.sismics.docs.core.dao.jpa;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
import javax.persistence.EntityManager;
|
||||
import javax.persistence.NoResultException;
|
||||
import javax.persistence.Query;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Strings;
|
||||
import com.sismics.docs.core.dao.jpa.criteria.DocumentCriteria;
|
||||
import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
import com.sismics.docs.core.util.jpa.PaginatedList;
|
||||
import com.sismics.docs.core.util.jpa.PaginatedLists;
|
||||
@ -10,12 +25,6 @@ import com.sismics.docs.core.util.jpa.QueryParam;
|
||||
import com.sismics.docs.core.util.jpa.SortCriteria;
|
||||
import com.sismics.util.context.ThreadLocalContext;
|
||||
|
||||
import javax.persistence.EntityManager;
|
||||
import javax.persistence.NoResultException;
|
||||
import javax.persistence.Query;
|
||||
import java.sql.Timestamp;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Document DAO.
|
||||
*
|
||||
@ -40,6 +49,18 @@ public class DocumentDao {
|
||||
return document.getId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of all documents.
|
||||
*
|
||||
* @return List of documents
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public List<Document> findAll() {
|
||||
EntityManager em = ThreadLocalContext.get().getEntityManager();
|
||||
Query q = em.createQuery("select d from Document d where d.deleteDate is null");
|
||||
return q.getResultList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an active document.
|
||||
*
|
||||
@ -118,8 +139,9 @@ public class DocumentDao {
|
||||
* @param paginatedList List of documents (updated by side effects)
|
||||
* @param criteria Search criteria
|
||||
* @return List of document
|
||||
* @throws Exception
|
||||
*/
|
||||
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) {
|
||||
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
|
||||
Map<String, Object> parameterMap = new HashMap<String, Object>();
|
||||
List<String> criteriaList = new ArrayList<String>();
|
||||
|
||||
@ -133,9 +155,15 @@ public class DocumentDao {
|
||||
criteriaList.add("d.DOC_IDUSER_C = :userId");
|
||||
parameterMap.put("userId", criteria.getUserId());
|
||||
}
|
||||
if (criteria.getSearch() != null) {
|
||||
criteriaList.add("(d.DOC_TITLE_C LIKE :search OR d.DOC_DESCRIPTION_C LIKE :search OR f.FIL_CONTENT_C LIKE :search)");
|
||||
parameterMap.put("search", "%" + criteria.getSearch() + "%");
|
||||
if (!Strings.isNullOrEmpty(criteria.getSearch())) {
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
Set<String> documentIdList = luceneDao.search(criteria.getUserId(), criteria.getSearch());
|
||||
if (documentIdList.size() == 0) {
|
||||
// If the search doesn't find any document, the request should return nothing
|
||||
documentIdList.add(UUID.randomUUID().toString());
|
||||
}
|
||||
criteriaList.add("d.DOC_ID_C in :documentIdList");
|
||||
parameterMap.put("documentIdList", documentIdList);
|
||||
}
|
||||
if (criteria.getCreateDateMin() != null) {
|
||||
criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin");
|
||||
|
@ -1,8 +1,10 @@
|
||||
package com.sismics.docs.core.dao.lucene;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
@ -18,12 +20,12 @@ import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import com.sismics.docs.core.model.context.AppContext;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.util.LuceneUtil;
|
||||
import com.sismics.docs.core.util.LuceneUtil.LuceneRunnable;
|
||||
@ -40,55 +42,102 @@ public class LuceneDao {
|
||||
*
|
||||
* @param fileList
|
||||
*/
|
||||
public void rebuildIndex(final List<File> fileList) {
|
||||
public void rebuildIndex(final List<Document> documentList, final List<File> fileList) {
|
||||
LuceneUtil.handle(new LuceneRunnable() {
|
||||
@Override
|
||||
public void run(IndexWriter indexWriter) throws Exception {
|
||||
// Empty index
|
||||
indexWriter.deleteAll();
|
||||
|
||||
// Add all files
|
||||
for (File file : fileList) {
|
||||
org.apache.lucene.document.Document document = getDocumentFromFile(file);
|
||||
indexWriter.addDocument(document);
|
||||
}
|
||||
}
|
||||
});
|
||||
// Add all documents
|
||||
Map<String, Document> documentMap = new HashMap<>();
|
||||
for (Document document : documentList) {
|
||||
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
|
||||
indexWriter.addDocument(luceneDocument);
|
||||
documentMap.put(document.getId(), document);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add files to the index.
|
||||
*
|
||||
* @param fileList
|
||||
*/
|
||||
public void create(final List<File> fileList) {
|
||||
LuceneUtil.handle(new LuceneRunnable() {
|
||||
@Override
|
||||
public void run(IndexWriter indexWriter) throws Exception {
|
||||
// Add all files
|
||||
for (File file : fileList) {
|
||||
org.apache.lucene.document.Document document = getDocumentFromFile(file);
|
||||
indexWriter.addDocument(document);
|
||||
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, documentMap.get(file.getDocumentId()));
|
||||
indexWriter.addDocument(luceneDocument);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update index.
|
||||
* Add document to the index.
|
||||
*
|
||||
* @param fileList File list
|
||||
* @param document Document to add
|
||||
*/
|
||||
public void update(final List<File> fileList) {
|
||||
public void createDocument(final Document document) {
|
||||
LuceneUtil.handle(new LuceneRunnable() {
|
||||
@Override
|
||||
public void run(IndexWriter indexWriter) throws Exception {
|
||||
// Update all files
|
||||
for (File file : fileList) {
|
||||
org.apache.lucene.document.Document document = getDocumentFromFile(file);
|
||||
indexWriter.updateDocument(new Term("id", file.getId()), document);
|
||||
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
|
||||
indexWriter.addDocument(luceneDocument);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Add file to the index.
|
||||
*
|
||||
* @param file File to add
|
||||
* @param document Document linked to the file
|
||||
*/
|
||||
public void createFile(final File file, final Document document) {
|
||||
LuceneUtil.handle(new LuceneRunnable() {
|
||||
@Override
|
||||
public void run(IndexWriter indexWriter) throws Exception {
|
||||
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, document);
|
||||
indexWriter.addDocument(luceneDocument);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update document index.
|
||||
*
|
||||
* @param document Updated document
|
||||
*/
|
||||
public void updateDocument(final Document document) {
|
||||
LuceneUtil.handle(new LuceneRunnable() {
|
||||
@Override
|
||||
public void run(IndexWriter indexWriter) throws Exception {
|
||||
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
|
||||
indexWriter.updateDocument(new Term("id", document.getId()), luceneDocument);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update file index.
|
||||
*
|
||||
* @param file Updated file
|
||||
* @param document Document linked to the file
|
||||
*/
|
||||
public void updateFile(final File file, final Document document) {
|
||||
LuceneUtil.handle(new LuceneRunnable() {
|
||||
@Override
|
||||
public void run(IndexWriter indexWriter) throws Exception {
|
||||
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file, document);
|
||||
indexWriter.updateDocument(new Term("id", file.getId()), luceneDocument);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete document from the index.
|
||||
*
|
||||
* @param id Document ID to delete
|
||||
*/
|
||||
public void deleteDocument(final String id) {
|
||||
LuceneUtil.handle(new LuceneRunnable() {
|
||||
@Override
|
||||
public void run(IndexWriter indexWriter) throws Exception {
|
||||
indexWriter.deleteDocuments(new Term("id", id));
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -96,58 +145,87 @@ public class LuceneDao {
|
||||
/**
|
||||
* Search files.
|
||||
*
|
||||
* @param paginatedList
|
||||
* @param feedList
|
||||
* @param searchQuery
|
||||
* @return List of file IDs
|
||||
* @param userId User ID to filter on
|
||||
* @param searchQuery Search query
|
||||
* @return List of document IDs
|
||||
* @throws Exception
|
||||
*/
|
||||
public Set<String> search(String userId, String searchQuery, int limit) throws Exception {
|
||||
public Set<String> search(String userId, String searchQuery) throws Exception {
|
||||
// Escape query and add quotes so QueryParser generate a PhraseQuery
|
||||
searchQuery = "\"" + QueryParserUtil.escape(searchQuery) + "\"";
|
||||
|
||||
// Build search query
|
||||
StandardQueryParser qpHelper = new StandardQueryParser(new DocsStandardAnalyzer(Version.LUCENE_42));
|
||||
qpHelper.setPhraseSlop(100000); // PhraseQuery add terms
|
||||
Query contentQuery = qpHelper.parse(searchQuery, "content");
|
||||
|
||||
// Search on file content
|
||||
// Search on documents and files
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(contentQuery, Occur.SHOULD);
|
||||
query.add(qpHelper.parse(searchQuery, "content"), Occur.SHOULD);
|
||||
query.add(qpHelper.parse(searchQuery, "title"), Occur.SHOULD);
|
||||
query.add(qpHelper.parse(searchQuery, "description"), Occur.SHOULD);
|
||||
|
||||
// Filter on provided user ID
|
||||
List<Term> terms = new ArrayList<Term>();
|
||||
if (userId != null) {
|
||||
terms.add(new Term("user_id", userId));
|
||||
TermsFilter feedsFilter = new TermsFilter(terms);
|
||||
}
|
||||
TermsFilter userFilter = new TermsFilter(terms);
|
||||
|
||||
// Search
|
||||
IndexReader reader = DirectoryReader.open(AppContext.getInstance().getLuceneDirectory());
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
TopDocs topDocs = searcher.search(query, feedsFilter, limit);
|
||||
TopDocs topDocs = searcher.search(query, userFilter, Integer.MAX_VALUE);
|
||||
ScoreDoc[] docs = topDocs.scoreDocs;
|
||||
|
||||
// Extract file IDs
|
||||
Set<String> fileIdList = new HashSet<String>();
|
||||
// Extract document IDs
|
||||
Set<String> documentIdList = new HashSet<String>();
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
String id = searcher.doc(docs[i].doc).get("id");
|
||||
fileIdList.add(id);
|
||||
org.apache.lucene.document.Document document = searcher.doc(docs[i].doc);
|
||||
String type = document.get("type");
|
||||
String documentId = null;
|
||||
if (type.equals("document")) {
|
||||
documentId = document.get("id");
|
||||
} else if (type.equals("file")) {
|
||||
documentId = document.get("document_id");
|
||||
}
|
||||
documentIdList.add(documentId);
|
||||
}
|
||||
|
||||
return fileIdList;
|
||||
return documentIdList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build Lucene document from database document.
|
||||
*
|
||||
* @param document Document
|
||||
* @return Document
|
||||
*/
|
||||
private org.apache.lucene.document.Document getDocumentFromDocument(Document document) {
|
||||
org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
|
||||
luceneDocument.add(new StringField("id", document.getId(), Field.Store.YES));
|
||||
luceneDocument.add(new StringField("user_id", document.getUserId(), Field.Store.YES));
|
||||
luceneDocument.add(new StringField("type", "document", Field.Store.YES));
|
||||
luceneDocument.add(new TextField("title", document.getTitle(), Field.Store.NO));
|
||||
luceneDocument.add(new TextField("description", document.getDescription(), Field.Store.NO));
|
||||
|
||||
return luceneDocument;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build Lucene document from file.
|
||||
*
|
||||
* @param file File
|
||||
* @param document Document linked to the file
|
||||
* @return Document
|
||||
*/
|
||||
private org.apache.lucene.document.Document getDocumentFromFile(File file) {
|
||||
// Building document
|
||||
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
|
||||
document.add(new StringField("id", file.getId(), Field.Store.YES));
|
||||
document.add(new TextField("content", file.getContent(), Field.Store.NO));
|
||||
private org.apache.lucene.document.Document getDocumentFromFile(File file, Document document) {
|
||||
org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document();
|
||||
luceneDocument.add(new StringField("id", file.getId(), Field.Store.YES));
|
||||
luceneDocument.add(new StringField("user_id", document.getUserId(), Field.Store.YES));
|
||||
luceneDocument.add(new StringField("type", "file", Field.Store.YES));
|
||||
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
|
||||
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO));
|
||||
|
||||
return document;
|
||||
return luceneDocument;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,41 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
|
||||
/**
|
||||
* Document created event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class DocumentCreatedAsyncEvent {
|
||||
/**
|
||||
* Created document.
|
||||
*/
|
||||
private Document document;
|
||||
|
||||
/**
|
||||
* Getter of document.
|
||||
*
|
||||
* @return the document
|
||||
*/
|
||||
public Document getDocument() {
|
||||
return document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter of document.
|
||||
*
|
||||
* @param document document
|
||||
*/
|
||||
public void setDocument(Document document) {
|
||||
this.document = document;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Objects.toStringHelper(this)
|
||||
.add("document", document)
|
||||
.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
|
||||
/**
|
||||
* Document deleted event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class DocumentDeletedAsyncEvent {
|
||||
/**
|
||||
* Created document.
|
||||
*/
|
||||
private Document document;
|
||||
|
||||
/**
|
||||
* Getter of document.
|
||||
*
|
||||
* @return the document
|
||||
*/
|
||||
public Document getDocument() {
|
||||
return document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter of document.
|
||||
*
|
||||
* @param document document
|
||||
*/
|
||||
public void setDocument(Document document) {
|
||||
this.document = document;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Objects.toStringHelper(this)
|
||||
.add("document", document)
|
||||
.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
|
||||
/**
|
||||
* Document updated event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class DocumentUpdatedAsyncEvent {
|
||||
/**
|
||||
* Created document.
|
||||
*/
|
||||
private Document document;
|
||||
|
||||
/**
|
||||
* Getter of document.
|
||||
*
|
||||
* @return the document
|
||||
*/
|
||||
public Document getDocument() {
|
||||
return document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter of document.
|
||||
*
|
||||
* @param document document
|
||||
*/
|
||||
public void setDocument(Document document) {
|
||||
this.document = document;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Objects.toStringHelper(this)
|
||||
.add("document", document)
|
||||
.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
|
||||
/**
|
||||
* File deleted event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class FileDeletedAsyncEvent {
|
||||
/**
|
||||
* Deleted file.
|
||||
*/
|
||||
private File file;
|
||||
|
||||
/**
|
||||
* Getter of file.
|
||||
*
|
||||
* @return the file
|
||||
*/
|
||||
public File getFile() {
|
||||
return file;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setter of file.
|
||||
*
|
||||
* @param file file
|
||||
*/
|
||||
public void setFile(File file) {
|
||||
this.file = file;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Objects.toStringHelper(this)
|
||||
.add("file", file)
|
||||
.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
|
||||
/**
|
||||
* Rebuild index event.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class RebuildIndexAsyncEvent {
|
||||
@Override
|
||||
public String toString() {
|
||||
return Objects.toStringHelper(this)
|
||||
.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
package com.sismics.docs.core.event;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.eventbus.Subscribe;
|
||||
import com.sismics.docs.core.dao.jpa.DocumentDao;
|
||||
import com.sismics.docs.core.dao.jpa.FileDao;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.util.TransactionUtil;
|
||||
|
||||
/**
|
||||
* Listener on rebuild index.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class RebuildIndexAsyncListener {
|
||||
/**
|
||||
* Logger.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(RebuildIndexAsyncListener.class);
|
||||
|
||||
/**
|
||||
* Rebuild Lucene index.
|
||||
*
|
||||
* @param rebuildIndexAsyncEvent Index rebuild event
|
||||
* @throws Exception
|
||||
*/
|
||||
@Subscribe
|
||||
public void onArticleCreated(final RebuildIndexAsyncEvent rebuildIndexAsyncEvent) throws Exception {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Rebuild index event: " + rebuildIndexAsyncEvent.toString());
|
||||
}
|
||||
|
||||
// Fetch all documents and files
|
||||
TransactionUtil.handle(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
// Fetch all documents
|
||||
DocumentDao documentDao = new DocumentDao();
|
||||
List<Document> documentList = documentDao.findAll();
|
||||
|
||||
// Fetch all files
|
||||
FileDao fileDao = new FileDao();
|
||||
List<File> fileList = fileDao.findAll();
|
||||
|
||||
// Rebuild index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.rebuildIndex(documentList, fileList);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.sismics.docs.core.listener.async;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.eventbus.Subscribe;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
import com.sismics.docs.core.event.DocumentCreatedAsyncEvent;
|
||||
|
||||
/**
|
||||
* Listener on document created.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class DocumentCreatedAsyncListener {
|
||||
/**
|
||||
* Logger.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(DocumentCreatedAsyncListener.class);
|
||||
|
||||
/**
|
||||
* Document created.
|
||||
*
|
||||
* @param documentCreatedAsyncEvent Document created event
|
||||
* @throws Exception
|
||||
*/
|
||||
@Subscribe
|
||||
public void on(final DocumentCreatedAsyncEvent documentCreatedAsyncEvent) throws Exception {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Document created event: " + documentCreatedAsyncEvent.toString());
|
||||
}
|
||||
|
||||
// Update Lucene index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.createDocument(documentCreatedAsyncEvent.getDocument());
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.sismics.docs.core.listener.async;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.eventbus.Subscribe;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
import com.sismics.docs.core.event.DocumentDeletedAsyncEvent;
|
||||
|
||||
/**
|
||||
* Listener on document deleted.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class DocumentDeletedAsyncListener {
|
||||
/**
|
||||
* Logger.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(DocumentDeletedAsyncListener.class);
|
||||
|
||||
/**
|
||||
* Document deleted.
|
||||
*
|
||||
* @param documentDeletedAsyncEvent Document deleted event
|
||||
* @throws Exception
|
||||
*/
|
||||
@Subscribe
|
||||
public void on(final DocumentDeletedAsyncEvent documentDeletedAsyncEvent) throws Exception {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Document deleted event: " + documentDeletedAsyncEvent.toString());
|
||||
}
|
||||
|
||||
// Update Lucene index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.deleteDocument(documentDeletedAsyncEvent.getDocument().getId());
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.sismics.docs.core.listener.async;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.eventbus.Subscribe;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent;
|
||||
|
||||
/**
|
||||
* Listener on document updated.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class DocumentUpdatedAsyncListener {
|
||||
/**
|
||||
* Logger.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(DocumentUpdatedAsyncListener.class);
|
||||
|
||||
/**
|
||||
* Document updated.
|
||||
*
|
||||
* @param documentUpdatedAsyncEvent Document updated event
|
||||
* @throws Exception
|
||||
*/
|
||||
@Subscribe
|
||||
public void on(final DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent) throws Exception {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("Document updated event: " + documentUpdatedAsyncEvent.toString());
|
||||
}
|
||||
|
||||
// Update Lucene index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.updateDocument(documentUpdatedAsyncEvent.getDocument());
|
||||
}
|
||||
}
|
@ -6,12 +6,13 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.eventbus.Subscribe;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
|
||||
import com.sismics.docs.core.util.FileUtil;
|
||||
import com.sismics.util.ImageUtil;
|
||||
|
||||
/**
|
||||
* Listener on new file.
|
||||
* Listener on file created.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
@ -22,13 +23,13 @@ public class FileCreatedAsyncListener {
|
||||
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
|
||||
|
||||
/**
|
||||
* Process new file.
|
||||
* File created.
|
||||
*
|
||||
* @param fileCreatedAsyncEvent New file created event
|
||||
* @param fileCreatedAsyncEvent File created event
|
||||
* @throws Exception
|
||||
*/
|
||||
@Subscribe
|
||||
public void onFileCreated(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception {
|
||||
public void on(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("File created event: " + fileCreatedAsyncEvent.toString());
|
||||
}
|
||||
@ -39,5 +40,9 @@ public class FileCreatedAsyncListener {
|
||||
FileUtil.ocrFile(fileCreatedAsyncEvent.getDocument(), fileCreatedAsyncEvent.getFile());
|
||||
log.info(MessageFormat.format("File OCR-ized in {0}ms", System.currentTimeMillis() - startTime));
|
||||
}
|
||||
|
||||
// Update Lucene index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.createFile(fileCreatedAsyncEvent.getFile(), fileCreatedAsyncEvent.getDocument());
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,39 @@
|
||||
package com.sismics.docs.core.listener.async;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.eventbus.Subscribe;
|
||||
import com.sismics.docs.core.dao.lucene.LuceneDao;
|
||||
import com.sismics.docs.core.event.FileDeletedAsyncEvent;
|
||||
|
||||
/**
|
||||
* Listener on file deleted.
|
||||
*
|
||||
* @author bgamard
|
||||
*/
|
||||
public class FileDeletedAsyncListener {
|
||||
/**
|
||||
* Logger.
|
||||
*/
|
||||
private static final Logger log = LoggerFactory.getLogger(FileDeletedAsyncListener.class);
|
||||
|
||||
/**
|
||||
* File deleted.
|
||||
*
|
||||
* @param fileDeletedAsyncEvent File deleted event
|
||||
* @throws Exception
|
||||
*/
|
||||
@Subscribe
|
||||
public void on(final FileDeletedAsyncEvent fileDeletedAsyncEvent) throws Exception {
|
||||
if (log.isInfoEnabled()) {
|
||||
log.info("File deleted event: " + fileDeletedAsyncEvent.toString());
|
||||
}
|
||||
|
||||
// TODO Delete the file from storage
|
||||
|
||||
// Update Lucene index
|
||||
LuceneDao luceneDao = new LuceneDao();
|
||||
luceneDao.deleteDocument(fileDeletedAsyncEvent.getFile().getId());
|
||||
}
|
||||
}
|
@ -13,7 +13,12 @@ import com.google.common.eventbus.AsyncEventBus;
|
||||
import com.google.common.eventbus.EventBus;
|
||||
import com.sismics.docs.core.constant.ConfigType;
|
||||
import com.sismics.docs.core.dao.jpa.ConfigDao;
|
||||
import com.sismics.docs.core.event.RebuildIndexAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.DocumentCreatedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.DocumentDeletedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.DocumentUpdatedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.FileCreatedAsyncListener;
|
||||
import com.sismics.docs.core.listener.async.FileDeletedAsyncListener;
|
||||
import com.sismics.docs.core.listener.sync.DeadEventListener;
|
||||
import com.sismics.docs.core.model.jpa.Config;
|
||||
import com.sismics.docs.core.service.IndexingService;
|
||||
@ -80,6 +85,11 @@ public class AppContext {
|
||||
|
||||
asyncEventBus = newAsyncEventBus();
|
||||
asyncEventBus.register(new FileCreatedAsyncListener());
|
||||
asyncEventBus.register(new FileDeletedAsyncListener());
|
||||
asyncEventBus.register(new DocumentCreatedAsyncListener());
|
||||
asyncEventBus.register(new DocumentUpdatedAsyncListener());
|
||||
asyncEventBus.register(new DocumentDeletedAsyncListener());
|
||||
asyncEventBus.register(new RebuildIndexAsyncListener());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1,10 +1,9 @@
|
||||
package com.sismics.docs.core.service;
|
||||
|
||||
import com.google.common.util.concurrent.AbstractScheduledService;
|
||||
import com.sismics.docs.core.constant.Constants;
|
||||
import com.sismics.docs.core.model.context.AppContext;
|
||||
import com.sismics.docs.core.util.DirectoryUtil;
|
||||
import com.sismics.docs.core.util.TransactionUtil;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.store.SimpleFSDirectory;
|
||||
@ -12,9 +11,12 @@ import org.apache.lucene.store.SimpleFSLockFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import com.google.common.util.concurrent.AbstractScheduledService;
|
||||
import com.sismics.docs.core.constant.Constants;
|
||||
import com.sismics.docs.core.event.RebuildIndexAsyncEvent;
|
||||
import com.sismics.docs.core.model.context.AppContext;
|
||||
import com.sismics.docs.core.util.DirectoryUtil;
|
||||
import com.sismics.docs.core.util.TransactionUtil;
|
||||
|
||||
/**
|
||||
* Indexing service.
|
||||
@ -85,6 +87,16 @@ public class IndexingService extends AbstractScheduledService {
|
||||
return Scheduler.newFixedDelaySchedule(0, 1, TimeUnit.HOURS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy and rebuild Lucene index.
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
public void rebuildIndex() throws Exception {
|
||||
RebuildIndexAsyncEvent rebuildIndexAsyncEvent = new RebuildIndexAsyncEvent();
|
||||
AppContext.getInstance().getAsyncEventBus().post(rebuildIndexAsyncEvent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Getter of directory.
|
||||
*
|
||||
|
@ -1,6 +1,8 @@
|
||||
package com.sismics.docs.core.util;
|
||||
|
||||
import java.awt.color.ColorSpace;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.ColorConvertOp;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
@ -46,12 +48,11 @@ public class FileUtil {
|
||||
log.error("Error reading the image " + storedfile, e);
|
||||
}
|
||||
|
||||
// Upscale the image if it is too small
|
||||
if (image.getWidth() < 2500 || image.getHeight() < 2500) {
|
||||
BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500);
|
||||
// Upscale and grayscale the image
|
||||
BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500,
|
||||
new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null));
|
||||
image.flush();
|
||||
image = resizedImage;
|
||||
}
|
||||
|
||||
// OCR the file
|
||||
try {
|
||||
|
@ -1,4 +0,0 @@
|
||||
- Index title and description (server)
|
||||
- Use Lucene for title and description searching (server)
|
||||
- Index OCR-ized content (server)
|
||||
- Batch to rebuild Lucene index (server)
|
@ -22,6 +22,7 @@ import com.sismics.docs.core.dao.jpa.DocumentDao;
|
||||
import com.sismics.docs.core.dao.jpa.FileDao;
|
||||
import com.sismics.docs.core.dao.jpa.criteria.DocumentCriteria;
|
||||
import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
|
||||
import com.sismics.docs.core.model.context.AppContext;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.util.ConfigUtil;
|
||||
@ -68,7 +69,11 @@ public class AppResource extends BaseResource {
|
||||
SortCriteria sortCriteria = new SortCriteria(0, true);
|
||||
DocumentCriteria documentCriteria = new DocumentCriteria();
|
||||
documentCriteria.setUserId(principal.getId());
|
||||
try {
|
||||
documentDao.findByCriteria(paginatedList, documentCriteria, sortCriteria);
|
||||
} catch (Exception e) {
|
||||
throw new ServerException("SearchError", "Error searching in documents", e);
|
||||
}
|
||||
response.put("document_count", paginatedList.getResultCount());
|
||||
|
||||
// General data
|
||||
@ -146,7 +151,7 @@ public class AppResource extends BaseResource {
|
||||
@POST
|
||||
@Path("batch/ocr")
|
||||
@Produces(MediaType.APPLICATION_JSON)
|
||||
public Response batchReindex() throws JSONException {
|
||||
public Response batchOcr() throws JSONException {
|
||||
if (!authenticate()) {
|
||||
throw new ForbiddenClientException();
|
||||
}
|
||||
@ -164,4 +169,29 @@ public class AppResource extends BaseResource {
|
||||
response.put("status", "ok");
|
||||
return Response.ok().entity(response).build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy and rebuild Lucene index.
|
||||
*
|
||||
* @return Response
|
||||
* @throws JSONException
|
||||
*/
|
||||
@POST
|
||||
@Path("batch/reindex")
|
||||
@Produces(MediaType.APPLICATION_JSON)
|
||||
public Response batchReindex() throws JSONException {
|
||||
if (!authenticate()) {
|
||||
throw new ForbiddenClientException();
|
||||
}
|
||||
checkBaseFunction(BaseFunction.ADMIN);
|
||||
|
||||
JSONObject response = new JSONObject();
|
||||
try {
|
||||
AppContext.getInstance().getIndexingService().rebuildIndex();
|
||||
} catch (Exception e) {
|
||||
throw new ServerException("IndexingError", "Error rebuilding index", e);
|
||||
}
|
||||
response.put("status", "ok");
|
||||
return Response.ok().entity(response).build();
|
||||
}
|
||||
}
|
||||
|
@ -38,6 +38,10 @@ import com.sismics.docs.core.dao.jpa.TagDao;
|
||||
import com.sismics.docs.core.dao.jpa.criteria.DocumentCriteria;
|
||||
import com.sismics.docs.core.dao.jpa.dto.DocumentDto;
|
||||
import com.sismics.docs.core.dao.jpa.dto.TagDto;
|
||||
import com.sismics.docs.core.event.DocumentCreatedAsyncEvent;
|
||||
import com.sismics.docs.core.event.DocumentDeletedAsyncEvent;
|
||||
import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent;
|
||||
import com.sismics.docs.core.model.context.AppContext;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
import com.sismics.docs.core.model.jpa.Share;
|
||||
import com.sismics.docs.core.model.jpa.Tag;
|
||||
@ -46,6 +50,7 @@ import com.sismics.docs.core.util.jpa.PaginatedLists;
|
||||
import com.sismics.docs.core.util.jpa.SortCriteria;
|
||||
import com.sismics.rest.exception.ClientException;
|
||||
import com.sismics.rest.exception.ForbiddenClientException;
|
||||
import com.sismics.rest.exception.ServerException;
|
||||
import com.sismics.rest.util.ValidationUtil;
|
||||
|
||||
/**
|
||||
@ -148,7 +153,11 @@ public class DocumentResource extends BaseResource {
|
||||
SortCriteria sortCriteria = new SortCriteria(sortColumn, asc);
|
||||
DocumentCriteria documentCriteria = parseSearchQuery(search);
|
||||
documentCriteria.setUserId(principal.getId());
|
||||
try {
|
||||
documentDao.findByCriteria(paginatedList, documentCriteria, sortCriteria);
|
||||
} catch (Exception e) {
|
||||
throw new ServerException("SearchError", "Error searching in documents", e);
|
||||
}
|
||||
|
||||
for (DocumentDto documentDto : paginatedList.getResultList()) {
|
||||
JSONObject document = new JSONObject();
|
||||
@ -296,6 +305,11 @@ public class DocumentResource extends BaseResource {
|
||||
// Update tags
|
||||
updateTagList(documentId, tagList);
|
||||
|
||||
// Raise a document created event
|
||||
DocumentCreatedAsyncEvent documentCreatedAsyncEvent = new DocumentCreatedAsyncEvent();
|
||||
documentCreatedAsyncEvent.setDocument(document);
|
||||
AppContext.getInstance().getAsyncEventBus().post(documentCreatedAsyncEvent);
|
||||
|
||||
JSONObject response = new JSONObject();
|
||||
response.put("id", documentId);
|
||||
return Response.ok().entity(response).build();
|
||||
@ -358,6 +372,11 @@ public class DocumentResource extends BaseResource {
|
||||
// Update tags
|
||||
updateTagList(id, tagList);
|
||||
|
||||
// Raise a document updated event
|
||||
DocumentUpdatedAsyncEvent documentUpdatedAsyncEvent = new DocumentUpdatedAsyncEvent();
|
||||
documentUpdatedAsyncEvent.setDocument(document);
|
||||
AppContext.getInstance().getAsyncEventBus().post(documentUpdatedAsyncEvent);
|
||||
|
||||
// Always return ok
|
||||
JSONObject response = new JSONObject();
|
||||
response.put("id", id);
|
||||
@ -415,6 +434,11 @@ public class DocumentResource extends BaseResource {
|
||||
throw new ClientException("DocumentNotFound", MessageFormat.format("Document not found: {0}", id));
|
||||
}
|
||||
|
||||
// Raise a document deleted event
|
||||
DocumentDeletedAsyncEvent documentDeletedAsyncEvent = new DocumentDeletedAsyncEvent();
|
||||
documentDeletedAsyncEvent.setDocument(document);
|
||||
AppContext.getInstance().getAsyncEventBus().post(documentDeletedAsyncEvent);
|
||||
|
||||
// Delete the document
|
||||
documentDao.delete(document.getId());
|
||||
|
||||
|
@ -30,6 +30,7 @@ import com.sismics.docs.core.dao.jpa.DocumentDao;
|
||||
import com.sismics.docs.core.dao.jpa.FileDao;
|
||||
import com.sismics.docs.core.dao.jpa.ShareDao;
|
||||
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
|
||||
import com.sismics.docs.core.event.FileDeletedAsyncEvent;
|
||||
import com.sismics.docs.core.model.context.AppContext;
|
||||
import com.sismics.docs.core.model.jpa.Document;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
@ -247,8 +248,12 @@ public class FileResource extends BaseResource {
|
||||
throw new ClientException("FileNotFound", MessageFormat.format("File not found: {0}", id));
|
||||
}
|
||||
|
||||
// Raise a new file deleted event
|
||||
FileDeletedAsyncEvent fileDeletedAsyncEvent = new FileDeletedAsyncEvent();
|
||||
fileDeletedAsyncEvent.setFile(file);
|
||||
AppContext.getInstance().getAsyncEventBus().post(fileDeletedAsyncEvent);
|
||||
|
||||
// Delete the file
|
||||
// TODO Delete the file from storage too
|
||||
fileDao.delete(file.getId());
|
||||
|
||||
// Always return ok
|
||||
|
@ -49,6 +49,12 @@ public class TestAppResource extends BaseJerseyTest {
|
||||
response = appResource.post(ClientResponse.class);
|
||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||
json = response.getEntity(JSONObject.class);
|
||||
|
||||
// Rebuild Lucene index
|
||||
appResource = resource().path("/app/batch/reindex");
|
||||
appResource.addFilter(new CookieAuthenticationFilter(adminAuthenticationToken));
|
||||
response = appResource.post(ClientResponse.class);
|
||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -54,7 +54,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
||||
WebResource documentResource = resource().path("/document");
|
||||
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
||||
postParams = new MultivaluedMapImpl();
|
||||
postParams.add("title", "My super document 1");
|
||||
postParams.add("title", "My super title document 1");
|
||||
postParams.add("description", "My super description for document 1");
|
||||
postParams.add("tags", tag1Id);
|
||||
postParams.add("language", "eng");
|
||||
@ -121,6 +121,28 @@ public class TestDocumentResource extends BaseJerseyTest {
|
||||
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
|
||||
Assert.assertEquals(create1Date, documents.getJSONObject(0).getLong("create_date"));
|
||||
|
||||
// Search documents by query
|
||||
documentResource = resource().path("/document/list");
|
||||
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
||||
getParams = new MultivaluedMapImpl();
|
||||
getParams.putSingle("search", "title");
|
||||
response = documentResource.queryParams(getParams).get(ClientResponse.class);
|
||||
json = response.getEntity(JSONObject.class);
|
||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||
documents = json.getJSONArray("documents");
|
||||
Assert.assertTrue(documents.length() == 1);
|
||||
|
||||
// Search documents by query
|
||||
documentResource = resource().path("/document/list");
|
||||
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
||||
getParams = new MultivaluedMapImpl();
|
||||
getParams.putSingle("search", "description");
|
||||
response = documentResource.queryParams(getParams).get(ClientResponse.class);
|
||||
json = response.getEntity(JSONObject.class);
|
||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||
documents = json.getJSONArray("documents");
|
||||
Assert.assertTrue(documents.length() == 1);
|
||||
|
||||
// Search documents by date
|
||||
documentResource = resource().path("/document/list");
|
||||
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
||||
|
@ -6,4 +6,4 @@ log4j.appender.MEMORY=com.sismics.util.log4j.MemoryAppender
|
||||
log4j.appender.MEMORY.size=1000
|
||||
|
||||
log4j.logger.com.sismics=DEBUG
|
||||
log4j.logger.org.hibernate.internal.util.EntityPrinter=INFO
|
||||
log4j.logger.org.hibernate=ERROR
|
Loading…
Reference in New Issue
Block a user