Closes #241: search highlighting

This commit is contained in:
Benjamin Gamard 2018-10-20 21:16:06 +02:00
parent 8c37af6207
commit 6ceb1f6c02
4 changed files with 34 additions and 17 deletions

View File

@ -101,6 +101,11 @@ public class DocumentDto {
*/ */
private String currentStepName; private String currentStepName;
/**
* Search highlight.
*/
private String highlight;
public String getId() { public String getId() {
return id; return id;
} }
@ -253,4 +258,13 @@ public class DocumentDto {
this.currentStepName = currentStepName; this.currentStepName = currentStepName;
return this; return this;
} }
public String getHighlight() {
return highlight;
}
public DocumentDto setHighlight(String highlight) {
this.highlight = highlight;
return this;
}
} }

View File

@ -3,6 +3,7 @@ package com.sismics.docs.core.util.indexing;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.sismics.docs.core.constant.ConfigType; import com.sismics.docs.core.constant.ConfigType;
import com.sismics.docs.core.constant.PermType; import com.sismics.docs.core.constant.PermType;
import com.sismics.docs.core.dao.ConfigDao; import com.sismics.docs.core.dao.ConfigDao;
@ -218,6 +219,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception { public void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
Map<String, Object> parameterMap = new HashMap<>(); Map<String, Object> parameterMap = new HashMap<>();
List<String> criteriaList = new ArrayList<>(); List<String> criteriaList = new ArrayList<>();
Map<String, String> documentSearchMap = Maps.newHashMap();
StringBuilder sb = new StringBuilder("select distinct d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, d.DOC_LANGUAGE_C c4, "); StringBuilder sb = new StringBuilder("select distinct d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, d.DOC_LANGUAGE_C c4, ");
sb.append(" s.count c5, "); sb.append(" s.count c5, ");
@ -248,13 +250,13 @@ public class LuceneIndexingHandler implements IndexingHandler {
parameterMap.put("targetIdList", criteria.getTargetIdList()); parameterMap.put("targetIdList", criteria.getTargetIdList());
} }
if (!Strings.isNullOrEmpty(criteria.getSearch()) || !Strings.isNullOrEmpty(criteria.getFullSearch())) { if (!Strings.isNullOrEmpty(criteria.getSearch()) || !Strings.isNullOrEmpty(criteria.getFullSearch())) {
Set<String> documentIdList = search(criteria.getSearch(), criteria.getFullSearch()); documentSearchMap = search(criteria.getSearch(), criteria.getFullSearch());
if (documentIdList.isEmpty()) { if (documentSearchMap.isEmpty()) {
// If the search doesn't find any document, the request should return nothing // If the search doesn't find any document, the request should return nothing
documentIdList.add(UUID.randomUUID().toString()); documentSearchMap.put(UUID.randomUUID().toString(), null);
} }
criteriaList.add("d.DOC_ID_C in :documentIdList"); criteriaList.add("d.DOC_ID_C in :documentIdList");
parameterMap.put("documentIdList", documentIdList); parameterMap.put("documentIdList", documentSearchMap.keySet());
suggestSearchTerms(criteria.getSearch(), suggestionList); suggestSearchTerms(criteria.getSearch(), suggestionList);
} }
@ -330,6 +332,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
documentDto.setActiveRoute(o[i++] != null); documentDto.setActiveRoute(o[i++] != null);
documentDto.setCurrentStepName((String) o[i++]); documentDto.setCurrentStepName((String) o[i++]);
documentDto.setUpdateTimestamp(((Timestamp) o[i]).getTime()); documentDto.setUpdateTimestamp(((Timestamp) o[i]).getTime());
documentDto.setHighlight(documentSearchMap.get(documentDto.getId()));
documentDtoList.add(documentDto); documentDtoList.add(documentDto);
} }
@ -365,10 +368,10 @@ public class LuceneIndexingHandler implements IndexingHandler {
* *
* @param searchQuery Search query on metadatas * @param searchQuery Search query on metadatas
* @param fullSearchQuery Search query on all fields * @param fullSearchQuery Search query on all fields
* @return List of document IDs * @return Map of document IDs as key and highlight as value
* @throws Exception e * @throws Exception e
*/ */
private Set<String> search(String searchQuery, String fullSearchQuery) throws Exception { private Map<String, String> search(String searchQuery, String fullSearchQuery) throws Exception {
// Escape query and add quotes so QueryParser generate a PhraseQuery // Escape query and add quotes so QueryParser generate a PhraseQuery
String escapedSearchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\""; String escapedSearchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
String escapedFullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\""; String escapedFullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";
@ -396,10 +399,10 @@ public class LuceneIndexingHandler implements IndexingHandler {
// Search // Search
DirectoryReader directoryReader = getDirectoryReader(); DirectoryReader directoryReader = getDirectoryReader();
Set<String> documentIdList = new HashSet<>(); Map<String, String> documentMap = Maps.newHashMap();
if (directoryReader == null) { if (directoryReader == null) {
// The directory reader is not yet initialized (probably because there is nothing indexed) // The directory reader is not yet initialized (probably because there is nothing indexed)
return documentIdList; return documentMap;
} }
IndexSearcher searcher = new IndexSearcher(directoryReader); IndexSearcher searcher = new IndexSearcher(directoryReader);
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE); TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
@ -409,31 +412,28 @@ public class LuceneIndexingHandler implements IndexingHandler {
SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder(); SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder();
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(query)); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(query));
// Extract document IDs // Extract document IDs and highlights
for (ScoreDoc doc : docs) { for (ScoreDoc doc : docs) {
org.apache.lucene.document.Document document = searcher.doc(doc.doc); org.apache.lucene.document.Document document = searcher.doc(doc.doc);
String type = document.get("doctype"); String type = document.get("doctype");
String documentId = null; String documentId = null;
String highlight = null;
if (type.equals("document")) { if (type.equals("document")) {
documentId = document.get("id"); documentId = document.get("id");
} else if (type.equals("file")) { } else if (type.equals("file")) {
documentId = document.get("document_id"); documentId = document.get("document_id");
/*
needs full reindexing from previous version to make it work, we now need the file content
String content = document.get("content"); String content = document.get("content");
if (content != null) { if (content != null) {
String hl = highlighter.getBestFragment(analyzer, "content", content); highlight = highlighter.getBestFragment(analyzer, "content", content);
System.out.println(hl);
}
*/
}
if (documentId != null) {
documentIdList.add(documentId);
} }
} }
return documentIdList; if (documentId != null) {
documentMap.put(documentId, highlight);
}
}
return documentMap;
} }
/** /**

View File

@ -392,6 +392,7 @@ public class DocumentResource extends BaseResource {
documents.add(Json.createObjectBuilder() documents.add(Json.createObjectBuilder()
.add("id", documentDto.getId()) .add("id", documentDto.getId())
.add("highlight", JsonUtil.nullable(documentDto.getHighlight()))
.add("title", documentDto.getTitle()) .add("title", documentDto.getTitle())
.add("description", JsonUtil.nullable(documentDto.getDescription())) .add("description", JsonUtil.nullable(documentDto.getDescription()))
.add("create_date", documentDto.getCreateTimestamp()) .add("create_date", documentDto.getCreateTimestamp())

View File

@ -246,6 +246,8 @@
{{ tag.name }} {{ tag.name }}
</span> </span>
</div> </div>
<div ng-if="document.highlight" class="small well-sm" ng-bind-html="document.highlight"></div>
</td> </td>
</tr> </tr>
</tbody> </tbody>