#241: search suggestions

This commit is contained in:
Benjamin Gamard 2018-10-19 19:13:39 +02:00
parent 7baf5e44fd
commit 8c37af6207
7 changed files with 104 additions and 21 deletions

View File

@ -112,6 +112,16 @@
<artifactId>lucene-queryparser</artifactId> <artifactId>lucene-queryparser</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-suggest</artifactId>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
</dependency>
<dependency> <dependency>
<groupId>com.sun.mail</groupId> <groupId>com.sun.mail</groupId>
<artifactId>javax.mail</artifactId> <artifactId>javax.mail</artifactId>

View File

@ -100,9 +100,10 @@ public interface IndexingHandler {
* Searches documents by criteria. * Searches documents by criteria.
* *
* @param paginatedList List of documents (updated by side effects) * @param paginatedList List of documents (updated by side effects)
* @param suggestionList Suggestion of search query (updated by side effects)
* @param criteria Search criteria * @param criteria Search criteria
* @param sortCriteria Sort criteria * @param sortCriteria Sort criteria
* @throws Exception e * @throws Exception e
*/ */
void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception; void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception;
} }

View File

@ -18,6 +18,7 @@ import com.sismics.docs.core.util.jpa.PaginatedLists;
import com.sismics.docs.core.util.jpa.QueryParam; import com.sismics.docs.core.util.jpa.QueryParam;
import com.sismics.docs.core.util.jpa.SortCriteria; import com.sismics.docs.core.util.jpa.SortCriteria;
import com.sismics.util.ClasspathScanner; import com.sismics.util.ClasspathScanner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
@ -26,6 +27,13 @@ import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil; import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser; import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
@ -207,7 +215,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
} }
@Override @Override
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception { public void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
Map<String, Object> parameterMap = new HashMap<>(); Map<String, Object> parameterMap = new HashMap<>();
List<String> criteriaList = new ArrayList<>(); List<String> criteriaList = new ArrayList<>();
@ -247,6 +255,8 @@ public class LuceneIndexingHandler implements IndexingHandler {
} }
criteriaList.add("d.DOC_ID_C in :documentIdList"); criteriaList.add("d.DOC_ID_C in :documentIdList");
parameterMap.put("documentIdList", documentIdList); parameterMap.put("documentIdList", documentIdList);
suggestSearchTerms(criteria.getSearch(), suggestionList);
} }
if (criteria.getCreateDateMin() != null) { if (criteria.getCreateDateMin() != null) {
criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin"); criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin");
@ -326,6 +336,30 @@ public class LuceneIndexingHandler implements IndexingHandler {
paginatedList.setResultList(documentDtoList); paginatedList.setResultList(documentDtoList);
} }
/**
* Suggest search terms according to the user query.
*
* @param search User search query
* @param suggestionList Suggestion of search query (updated by side effects)
* @throws Exception e
*/
private void suggestSearchTerms(String search, List<String> suggestionList) throws Exception {
DirectoryReader directoryReader = getDirectoryReader();
if (directoryReader == null) {
return;
}
FuzzySuggester suggester = new FuzzySuggester(new StandardAnalyzer());
LuceneDictionary dictionary = new LuceneDictionary(directoryReader, "title");
suggester.build(dictionary);
int lastIndex = search.lastIndexOf(' ');
String suggestQuery = search.substring(lastIndex < 0 ? 0 : lastIndex);
List<Lookup.LookupResult> lookupResultList = suggester.lookup(suggestQuery, false, 10);
for (Lookup.LookupResult lookupResult : lookupResultList) {
suggestionList.add(lookupResult.key.toString());
}
}
/** /**
* Fulltext search in files and documents. * Fulltext search in files and documents.
* *
@ -336,27 +370,28 @@ public class LuceneIndexingHandler implements IndexingHandler {
*/ */
private Set<String> search(String searchQuery, String fullSearchQuery) throws Exception { private Set<String> search(String searchQuery, String fullSearchQuery) throws Exception {
// Escape query and add quotes so QueryParser generate a PhraseQuery // Escape query and add quotes so QueryParser generate a PhraseQuery
searchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\""; String escapedSearchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
fullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\""; String escapedFullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";
// Build search query // Build search query
StandardQueryParser qpHelper = new StandardQueryParser(new StandardAnalyzer()); Analyzer analyzer = new StandardAnalyzer();
StandardQueryParser qpHelper = new StandardQueryParser(analyzer);
qpHelper.setPhraseSlop(100); // PhraseQuery add terms qpHelper.setPhraseSlop(100); // PhraseQuery add terms
// Search on documents and files // Search on documents and files
BooleanQuery query = new BooleanQuery.Builder() BooleanQuery query = new BooleanQuery.Builder()
.add(qpHelper.parse(searchQuery, "title"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "title"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "description"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "description"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "subject"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "subject"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "identifier"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "identifier"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "publisher"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "publisher"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "format"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "format"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "source"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "source"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "type"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "type"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "coverage"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "coverage"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "rights"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "rights"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(searchQuery, "filename"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedSearchQuery, "filename"), BooleanClause.Occur.SHOULD)
.add(qpHelper.parse(fullSearchQuery, "content"), BooleanClause.Occur.SHOULD) .add(qpHelper.parse(escapedFullSearchQuery, "content"), BooleanClause.Occur.SHOULD)
.build(); .build();
// Search // Search
@ -370,6 +405,10 @@ public class LuceneIndexingHandler implements IndexingHandler {
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE); TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
ScoreDoc[] docs = topDocs.scoreDocs; ScoreDoc[] docs = topDocs.scoreDocs;
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<strong>", "</strong>");
SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder();
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(query));
// Extract document IDs // Extract document IDs
for (ScoreDoc doc : docs) { for (ScoreDoc doc : docs) {
org.apache.lucene.document.Document document = searcher.doc(doc.doc); org.apache.lucene.document.Document document = searcher.doc(doc.doc);
@ -379,6 +418,15 @@ public class LuceneIndexingHandler implements IndexingHandler {
documentId = document.get("id"); documentId = document.get("id");
} else if (type.equals("file")) { } else if (type.equals("file")) {
documentId = document.get("document_id"); documentId = document.get("document_id");
/*
needs full reindexing from previous version to make it work, we now need the file content
String content = document.get("content");
if (content != null) {
String hl = highlighter.getBestFragment(analyzer, "content", content);
System.out.println(hl);
}
*/
} }
if (documentId != null) { if (documentId != null) {
documentIdList.add(documentId); documentIdList.add(documentId);
@ -447,7 +495,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES)); luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
} }
if (file.getContent() != null) { if (file.getContent() != null) {
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO)); luceneDocument.add(new TextField("content", file.getContent(), Field.Store.YES));
} }
return luceneDocument; return luceneDocument;

View File

@ -367,11 +367,12 @@ public class DocumentResource extends BaseResource {
TagDao tagDao = new TagDao(); TagDao tagDao = new TagDao();
PaginatedList<DocumentDto> paginatedList = PaginatedLists.create(limit, offset); PaginatedList<DocumentDto> paginatedList = PaginatedLists.create(limit, offset);
List<String> suggestionList = Lists.newArrayList();
SortCriteria sortCriteria = new SortCriteria(sortColumn, asc); SortCriteria sortCriteria = new SortCriteria(sortColumn, asc);
DocumentCriteria documentCriteria = parseSearchQuery(search); DocumentCriteria documentCriteria = parseSearchQuery(search);
documentCriteria.setTargetIdList(getTargetIdList(null)); documentCriteria.setTargetIdList(getTargetIdList(null));
try { try {
AppContext.getInstance().getIndexingHandler().findByCriteria(paginatedList, documentCriteria, sortCriteria); AppContext.getInstance().getIndexingHandler().findByCriteria(paginatedList, suggestionList, documentCriteria, sortCriteria);
} catch (Exception e) { } catch (Exception e) {
throw new ServerException("SearchError", "Error searching in documents", e); throw new ServerException("SearchError", "Error searching in documents", e);
} }
@ -402,8 +403,15 @@ public class DocumentResource extends BaseResource {
.add("file_count", documentDto.getFileCount()) .add("file_count", documentDto.getFileCount())
.add("tags", tags)); .add("tags", tags));
} }
JsonArrayBuilder suggestions = Json.createArrayBuilder();
for (String suggestion : suggestionList) {
suggestions.add(suggestion);
}
response.add("total", paginatedList.getResultCount()) response.add("total", paginatedList.getResultCount())
.add("documents", documents); .add("documents", documents)
.add("suggestions", suggestions);
return Response.ok().entity(response.build()).build(); return Response.ok().entity(response.build()).build();
} }

View File

@ -38,6 +38,7 @@ angular.module('docs').controller('Document', function ($scope, $rootScope, $tim
.then(function (data) { .then(function (data) {
$scope.documents = data.documents; $scope.documents = data.documents;
$scope.totalDocuments = data.total; $scope.totalDocuments = data.total;
$scope.suggestions = data.suggestions;
}); });
}; };

View File

@ -19,7 +19,10 @@
<!-- Search (simple and advanced) --> <!-- Search (simple and advanced) -->
<div class="row search-dropdown-anchor"> <div class="row search-dropdown-anchor">
<div class="col-xs-12 input-group"> <div class="col-xs-12 input-group">
<input type="search" class="form-control" ng-attr-placeholder="{{ 'document.search' | translate }}" ng-model="search" /> <input type="search" class="form-control"
uib-typeahead="suggestion for suggestion in suggestions"
typeahead-focus-first="false"
ng-attr-placeholder="{{ 'document.search' | translate }}" ng-model="search" />
<span class="input-group-addon btn" ng-click="openSearch()"> <span class="input-group-addon btn" ng-click="openSearch()">
<div uib-dropdown <div uib-dropdown
auto-close="disabled" is-open="searchOpened" dropdown-append-to="searchDropdownAnchor"> auto-close="disabled" is-open="searchOpened" dropdown-append-to="searchDropdownAnchor">

12
pom.xml
View File

@ -388,6 +388,18 @@
<version>${org.apache.lucene.version}</version> <version>${org.apache.lucene.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-suggest</artifactId>
<version>${org.apache.lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${org.apache.lucene.version}</version>
</dependency>
<dependency> <dependency>
<groupId>org.imgscalr</groupId> <groupId>org.imgscalr</groupId>
<artifactId>imgscalr-lib</artifactId> <artifactId>imgscalr-lib</artifactId>