#241: search suggestions

This commit is contained in:
Benjamin Gamard 2018-10-19 19:13:39 +02:00
parent 7baf5e44fd
commit 8c37af6207
7 changed files with 104 additions and 21 deletions

View File

@ -112,6 +112,16 @@
<artifactId>lucene-queryparser</artifactId>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-suggest</artifactId>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
</dependency>
<dependency>
<groupId>com.sun.mail</groupId>
<artifactId>javax.mail</artifactId>

View File

@ -100,9 +100,10 @@ public interface IndexingHandler {
* Searches documents by criteria.
*
* @param paginatedList List of documents (updated by side effects)
* @param suggestionList Suggestion of search query (updated by side effects)
* @param criteria Search criteria
* @param sortCriteria Sort criteria
* @throws Exception e
*/
void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception;
void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception;
}

View File

@ -18,6 +18,7 @@ import com.sismics.docs.core.util.jpa.PaginatedLists;
import com.sismics.docs.core.util.jpa.QueryParam;
import com.sismics.docs.core.util.jpa.SortCriteria;
import com.sismics.util.ClasspathScanner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
@ -26,6 +27,13 @@ import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.RAMDirectory;
@ -207,7 +215,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
}
@Override
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
Map<String, Object> parameterMap = new HashMap<>();
List<String> criteriaList = new ArrayList<>();
@ -247,6 +255,8 @@ public class LuceneIndexingHandler implements IndexingHandler {
}
criteriaList.add("d.DOC_ID_C in :documentIdList");
parameterMap.put("documentIdList", documentIdList);
suggestSearchTerms(criteria.getSearch(), suggestionList);
}
if (criteria.getCreateDateMin() != null) {
criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin");
@ -326,6 +336,30 @@ public class LuceneIndexingHandler implements IndexingHandler {
paginatedList.setResultList(documentDtoList);
}
/**
* Suggest search terms according to the user query.
*
* @param search User search query
* @param suggestionList Suggestion of search query (updated by side effects)
* @throws Exception e
*/
private void suggestSearchTerms(String search, List<String> suggestionList) throws Exception {
DirectoryReader directoryReader = getDirectoryReader();
if (directoryReader == null) {
return;
}
FuzzySuggester suggester = new FuzzySuggester(new StandardAnalyzer());
LuceneDictionary dictionary = new LuceneDictionary(directoryReader, "title");
suggester.build(dictionary);
int lastIndex = search.lastIndexOf(' ');
String suggestQuery = search.substring(lastIndex < 0 ? 0 : lastIndex);
List<Lookup.LookupResult> lookupResultList = suggester.lookup(suggestQuery, false, 10);
for (Lookup.LookupResult lookupResult : lookupResultList) {
suggestionList.add(lookupResult.key.toString());
}
}
/**
* Fulltext search in files and documents.
*
@ -336,27 +370,28 @@ public class LuceneIndexingHandler implements IndexingHandler {
*/
private Set<String> search(String searchQuery, String fullSearchQuery) throws Exception {
// Escape query and add quotes so QueryParser generate a PhraseQuery
searchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
fullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";
String escapedSearchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
String escapedFullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";
// Build search query
StandardQueryParser qpHelper = new StandardQueryParser(new StandardAnalyzer());
Analyzer analyzer = new StandardAnalyzer();
StandardQueryParser qpHelper = new StandardQueryParser(analyzer);
qpHelper.setPhraseSlop(100); // PhraseQuery add terms
// Search on documents and files
BooleanQuery query = new BooleanQuery.Builder()
.add(qpHelper.parse(searchQuery, "title"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "description"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "subject"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "identifier"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "publisher"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "format"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "source"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "type"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "coverage"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "rights"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "filename"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(fullSearchQuery, "content"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "title"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "description"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "subject"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "identifier"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "publisher"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "format"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "source"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "type"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "coverage"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "rights"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedSearchQuery, "filename"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(escapedFullSearchQuery, "content"), BooleanClause.Occur.SHOULD)
.build();
// Search
@ -370,6 +405,10 @@ public class LuceneIndexingHandler implements IndexingHandler {
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
ScoreDoc[] docs = topDocs.scoreDocs;
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<strong>", "</strong>");
SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder();
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(query));
// Extract document IDs
for (ScoreDoc doc : docs) {
org.apache.lucene.document.Document document = searcher.doc(doc.doc);
@ -379,6 +418,15 @@ public class LuceneIndexingHandler implements IndexingHandler {
documentId = document.get("id");
} else if (type.equals("file")) {
documentId = document.get("document_id");
/*
needs full reindexing from previous version to make it work, we now need the file content
String content = document.get("content");
if (content != null) {
String hl = highlighter.getBestFragment(analyzer, "content", content);
System.out.println(hl);
}
*/
}
if (documentId != null) {
documentIdList.add(documentId);
@ -447,7 +495,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
}
if (file.getContent() != null) {
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO));
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.YES));
}
return luceneDocument;

View File

@ -367,11 +367,12 @@ public class DocumentResource extends BaseResource {
TagDao tagDao = new TagDao();
PaginatedList<DocumentDto> paginatedList = PaginatedLists.create(limit, offset);
List<String> suggestionList = Lists.newArrayList();
SortCriteria sortCriteria = new SortCriteria(sortColumn, asc);
DocumentCriteria documentCriteria = parseSearchQuery(search);
documentCriteria.setTargetIdList(getTargetIdList(null));
try {
AppContext.getInstance().getIndexingHandler().findByCriteria(paginatedList, documentCriteria, sortCriteria);
AppContext.getInstance().getIndexingHandler().findByCriteria(paginatedList, suggestionList, documentCriteria, sortCriteria);
} catch (Exception e) {
throw new ServerException("SearchError", "Error searching in documents", e);
}
@ -402,8 +403,15 @@ public class DocumentResource extends BaseResource {
.add("file_count", documentDto.getFileCount())
.add("tags", tags));
}
JsonArrayBuilder suggestions = Json.createArrayBuilder();
for (String suggestion : suggestionList) {
suggestions.add(suggestion);
}
response.add("total", paginatedList.getResultCount())
.add("documents", documents);
.add("documents", documents)
.add("suggestions", suggestions);
return Response.ok().entity(response.build()).build();
}

View File

@ -38,6 +38,7 @@ angular.module('docs').controller('Document', function ($scope, $rootScope, $tim
.then(function (data) {
$scope.documents = data.documents;
$scope.totalDocuments = data.total;
$scope.suggestions = data.suggestions;
});
};

View File

@ -19,7 +19,10 @@
<!-- Search (simple and advanced) -->
<div class="row search-dropdown-anchor">
<div class="col-xs-12 input-group">
<input type="search" class="form-control" ng-attr-placeholder="{{ 'document.search' | translate }}" ng-model="search" />
<input type="search" class="form-control"
uib-typeahead="suggestion for suggestion in suggestions"
typeahead-focus-first="false"
ng-attr-placeholder="{{ 'document.search' | translate }}" ng-model="search" />
<span class="input-group-addon btn" ng-click="openSearch()">
<div uib-dropdown
auto-close="disabled" is-open="searchOpened" dropdown-append-to="searchDropdownAnchor">

12
pom.xml
View File

@ -388,6 +388,18 @@
<version>${org.apache.lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-suggest</artifactId>
<version>${org.apache.lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${org.apache.lucene.version}</version>
</dependency>
<dependency>
<groupId>org.imgscalr</groupId>
<artifactId>imgscalr-lib</artifactId>