Closes #222: full reindexing scalable + concurrent Lucene merges

This commit is contained in:
Benjamin Gamard 2018-10-29 18:32:39 +01:00
parent 2134f116da
commit 35c3ee023b
7 changed files with 64 additions and 28 deletions

View File

@ -45,16 +45,20 @@ public class DocumentDao {
/**
* Returns the list of all active documents.
*
*
* @param offset Offset
* @param limit Limit
* @return List of documents
*/
@SuppressWarnings("unchecked")
public List<Document> findAll() {
public List<Document> findAll(int offset, int limit) {
EntityManager em = ThreadLocalContext.get().getEntityManager();
Query q = em.createQuery("select d from Document d where d.deleteDate is null");
q.setFirstResult(offset);
q.setMaxResults(limit);
return q.getResultList();
}
/**
* Returns the list of all active documents from a user.
*

View File

@ -42,13 +42,17 @@ public class FileDao {
/**
* Returns the list of all files.
*
*
* @param offset Offset
* @param limit Limit
* @return List of files
*/
@SuppressWarnings("unchecked")
public List<File> findAll() {
public List<File> findAll(int offset, int limit) {
EntityManager em = ThreadLocalContext.get().getEntityManager();
Query q = em.createQuery("select f from File f where f.deleteDate is null");
q.setFirstResult(offset);
q.setMaxResults(limit);
return q.getResultList();
}

View File

@ -36,19 +36,32 @@ public class RebuildIndexAsyncListener {
if (log.isInfoEnabled()) {
log.info("Rebuild index event: " + event.toString());
}
// Fetch all documents and files
// Clear the index
AppContext.getInstance().getIndexingHandler().clearIndex();
// Index all documents
TransactionUtil.handle(() -> {
// Fetch all documents
int offset = 0;
DocumentDao documentDao = new DocumentDao();
List<Document> documentList = documentDao.findAll();
List<Document> documentList;
do {
documentList = documentDao.findAll(offset, 100);
AppContext.getInstance().getIndexingHandler().createDocuments(documentList);
offset += 100;
} while (documentList.size() > 0);
});
// Fetch all files
// Index all files
TransactionUtil.handle(() -> {
int offset = 0;
FileDao fileDao = new FileDao();
List<File> fileList = fileDao.findAll();
// Rebuild index
AppContext.getInstance().getIndexingHandler().rebuildIndex(documentList, fileList);
List<File> fileList;
do {
fileList = fileDao.findAll(offset, 100);
AppContext.getInstance().getIndexingHandler().createFiles(fileList);
offset += 100;
} while (fileList.size() > 0);
});
}
}

View File

@ -36,12 +36,23 @@ public interface IndexingHandler {
void shutDown();
/**
* Fully rebuild the index.
* Clear the index.
*/
void clearIndex();
/**
* Index a list of documents.
*
* @param documentList All documents
*/
void createDocuments(List<Document> documentList);
/**
* Index a list of files.
*
* @param fileList All files
*/
void rebuildIndex(List<Document> documentList, List<File> fileList);
void createFiles(List<File> fileList);
/**
* Index a new document.

View File

@ -101,7 +101,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
// Create an index writer
IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
config.setCommitOnClose(true);
config.setMergeScheduler(new SerialMergeScheduler());
config.setMergeScheduler(new ConcurrentMergeScheduler());
indexWriter = new IndexWriter(directory, config);
// Check index version and rebuild it if necessary
@ -142,18 +142,23 @@ public class LuceneIndexingHandler implements IndexingHandler {
}
@Override
public void rebuildIndex(final List<Document> documentList, final List<File> fileList) {
handle(indexWriter -> {
// Empty index
indexWriter.deleteAll();
public void clearIndex() {
handle(IndexWriter::deleteAll);
}
// Add all documents
@Override
public void createDocuments(List<Document> documentList) {
handle(indexWriter -> {
for (Document document : documentList) {
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
indexWriter.addDocument(luceneDocument);
}
});
}
// Add all files
@Override
public void createFiles(List<File> fileList) {
handle(indexWriter -> {
for (File file : fileList) {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file);
indexWriter.addDocument(luceneDocument);

View File

@ -36,7 +36,7 @@ public class PaginatedLists {
if (pageSize > MAX_PAGE_SIZE) {
pageSize = MAX_PAGE_SIZE;
}
return new PaginatedList<E>(pageSize, offset);
return new PaginatedList<>(pageSize, offset);
}
/**
@ -54,11 +54,11 @@ public class PaginatedLists {
* @param paginatedList Paginated list object containing parameters, and into which results are added by side effects
* @param queryParam Query parameters
*/
public static <E> void executeCountQuery(PaginatedList<E> paginatedList, QueryParam queryParam) {
private static <E> void executeCountQuery(PaginatedList<E> paginatedList, QueryParam queryParam) {
StringBuilder sb = new StringBuilder("select count(*) as result_count from (");
sb.append(queryParam.getQueryString());
sb.append(") as t1");
QueryParam countQueryParam = new QueryParam(sb.toString(), queryParam.getParameterMap());
Query q = QueryUtil.getNativeQuery(countQueryParam);
@ -70,7 +70,6 @@ public class PaginatedLists {
/**
* Executes a query and returns the data of the currunt page.
*
* @param em EntityManager
* @param paginatedList Paginated list object containing parameters, and into which results are added by side effects
* @param queryParam Query parameters
* @return List of results

View File

@ -595,7 +595,7 @@ public class AppResource extends BaseResource {
// Get all files
FileDao fileDao = new FileDao();
List<File> fileList = fileDao.findAll();
List<File> fileList = fileDao.findAll(0, Integer.MAX_VALUE);
Map<String, File> fileMap = new HashMap<>();
for (File file : fileList) {
fileMap.put(file.getId(), file);