Closes #222: full reindexing scalable + concurrent Lucene merges

This commit is contained in:
Benjamin Gamard 2018-10-29 18:32:39 +01:00
parent 2134f116da
commit 35c3ee023b
7 changed files with 64 additions and 28 deletions

View File

@ -46,12 +46,16 @@ public class DocumentDao {
/** /**
* Returns the list of all active documents. * Returns the list of all active documents.
* *
* @param offset Offset
* @param limit Limit
* @return List of documents * @return List of documents
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public List<Document> findAll() { public List<Document> findAll(int offset, int limit) {
EntityManager em = ThreadLocalContext.get().getEntityManager(); EntityManager em = ThreadLocalContext.get().getEntityManager();
Query q = em.createQuery("select d from Document d where d.deleteDate is null"); Query q = em.createQuery("select d from Document d where d.deleteDate is null");
q.setFirstResult(offset);
q.setMaxResults(limit);
return q.getResultList(); return q.getResultList();
} }

View File

@ -43,12 +43,16 @@ public class FileDao {
/** /**
* Returns the list of all files. * Returns the list of all files.
* *
* @param offset Offset
* @param limit Limit
* @return List of files * @return List of files
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public List<File> findAll() { public List<File> findAll(int offset, int limit) {
EntityManager em = ThreadLocalContext.get().getEntityManager(); EntityManager em = ThreadLocalContext.get().getEntityManager();
Query q = em.createQuery("select f from File f where f.deleteDate is null"); Query q = em.createQuery("select f from File f where f.deleteDate is null");
q.setFirstResult(offset);
q.setMaxResults(limit);
return q.getResultList(); return q.getResultList();
} }

View File

@ -37,18 +37,31 @@ public class RebuildIndexAsyncListener {
log.info("Rebuild index event: " + event.toString()); log.info("Rebuild index event: " + event.toString());
} }
// Fetch all documents and files // Clear the index
AppContext.getInstance().getIndexingHandler().clearIndex();
// Index all documents
TransactionUtil.handle(() -> { TransactionUtil.handle(() -> {
// Fetch all documents int offset = 0;
DocumentDao documentDao = new DocumentDao(); DocumentDao documentDao = new DocumentDao();
List<Document> documentList = documentDao.findAll(); List<Document> documentList;
do {
documentList = documentDao.findAll(offset, 100);
AppContext.getInstance().getIndexingHandler().createDocuments(documentList);
offset += 100;
} while (documentList.size() > 0);
});
// Fetch all files // Index all files
TransactionUtil.handle(() -> {
int offset = 0;
FileDao fileDao = new FileDao(); FileDao fileDao = new FileDao();
List<File> fileList = fileDao.findAll(); List<File> fileList;
do {
// Rebuild index fileList = fileDao.findAll(offset, 100);
AppContext.getInstance().getIndexingHandler().rebuildIndex(documentList, fileList); AppContext.getInstance().getIndexingHandler().createFiles(fileList);
offset += 100;
} while (fileList.size() > 0);
}); });
} }
} }

View File

@ -36,12 +36,23 @@ public interface IndexingHandler {
void shutDown(); void shutDown();
/** /**
* Fully rebuild the index. * Clear the index.
*/
void clearIndex();
/**
* Index a list of documents.
* *
* @param documentList All documents * @param documentList All documents
*/
void createDocuments(List<Document> documentList);
/**
* Index a list of files.
*
* @param fileList All files * @param fileList All files
*/ */
void rebuildIndex(List<Document> documentList, List<File> fileList); void createFiles(List<File> fileList);
/** /**
* Index a new document. * Index a new document.

View File

@ -101,7 +101,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
// Create an index writer // Create an index writer
IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer()); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
config.setCommitOnClose(true); config.setCommitOnClose(true);
config.setMergeScheduler(new SerialMergeScheduler()); config.setMergeScheduler(new ConcurrentMergeScheduler());
indexWriter = new IndexWriter(directory, config); indexWriter = new IndexWriter(directory, config);
// Check index version and rebuild it if necessary // Check index version and rebuild it if necessary
@ -142,18 +142,23 @@ public class LuceneIndexingHandler implements IndexingHandler {
} }
@Override @Override
public void rebuildIndex(final List<Document> documentList, final List<File> fileList) { public void clearIndex() {
handle(indexWriter -> { handle(IndexWriter::deleteAll);
// Empty index }
indexWriter.deleteAll();
// Add all documents @Override
public void createDocuments(List<Document> documentList) {
handle(indexWriter -> {
for (Document document : documentList) { for (Document document : documentList) {
org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document); org.apache.lucene.document.Document luceneDocument = getDocumentFromDocument(document);
indexWriter.addDocument(luceneDocument); indexWriter.addDocument(luceneDocument);
} }
});
}
// Add all files @Override
public void createFiles(List<File> fileList) {
handle(indexWriter -> {
for (File file : fileList) { for (File file : fileList) {
org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file); org.apache.lucene.document.Document luceneDocument = getDocumentFromFile(file);
indexWriter.addDocument(luceneDocument); indexWriter.addDocument(luceneDocument);

View File

@ -36,7 +36,7 @@ public class PaginatedLists {
if (pageSize > MAX_PAGE_SIZE) { if (pageSize > MAX_PAGE_SIZE) {
pageSize = MAX_PAGE_SIZE; pageSize = MAX_PAGE_SIZE;
} }
return new PaginatedList<E>(pageSize, offset); return new PaginatedList<>(pageSize, offset);
} }
/** /**
@ -54,7 +54,7 @@ public class PaginatedLists {
* @param paginatedList Paginated list object containing parameters, and into which results are added by side effects * @param paginatedList Paginated list object containing parameters, and into which results are added by side effects
* @param queryParam Query parameters * @param queryParam Query parameters
*/ */
public static <E> void executeCountQuery(PaginatedList<E> paginatedList, QueryParam queryParam) { private static <E> void executeCountQuery(PaginatedList<E> paginatedList, QueryParam queryParam) {
StringBuilder sb = new StringBuilder("select count(*) as result_count from ("); StringBuilder sb = new StringBuilder("select count(*) as result_count from (");
sb.append(queryParam.getQueryString()); sb.append(queryParam.getQueryString());
sb.append(") as t1"); sb.append(") as t1");
@ -70,7 +70,6 @@ public class PaginatedLists {
/** /**
* Executes a query and returns the data of the currunt page. * Executes a query and returns the data of the currunt page.
* *
* @param em EntityManager
* @param paginatedList Paginated list object containing parameters, and into which results are added by side effects * @param paginatedList Paginated list object containing parameters, and into which results are added by side effects
* @param queryParam Query parameters * @param queryParam Query parameters
* @return List of results * @return List of results

View File

@ -595,7 +595,7 @@ public class AppResource extends BaseResource {
// Get all files // Get all files
FileDao fileDao = new FileDao(); FileDao fileDao = new FileDao();
List<File> fileList = fileDao.findAll(); List<File> fileList = fileDao.findAll(0, Integer.MAX_VALUE);
Map<String, File> fileMap = new HashMap<>(); Map<String, File> fileMap = new HashMap<>();
for (File file : fileList) { for (File file : fileList) {
fileMap.put(file.getId(), file); fileMap.put(file.getId(), file);