mirror of
https://github.com/sismics/docs.git
synced 2025-01-18 07:45:10 +01:00
#241: search suggestions
This commit is contained in:
parent
7baf5e44fd
commit
8c37af6207
@ -112,6 +112,16 @@
|
||||
<artifactId>lucene-queryparser</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-suggest</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-highlighter</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.sun.mail</groupId>
|
||||
<artifactId>javax.mail</artifactId>
|
||||
|
@ -100,9 +100,10 @@ public interface IndexingHandler {
|
||||
* Searches documents by criteria.
|
||||
*
|
||||
* @param paginatedList List of documents (updated by side effects)
|
||||
* @param suggestionList Suggestion of search query (updated by side effects)
|
||||
* @param criteria Search criteria
|
||||
* @param sortCriteria Sort criteria
|
||||
* @throws Exception e
|
||||
*/
|
||||
void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception;
|
||||
void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception;
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ import com.sismics.docs.core.util.jpa.PaginatedLists;
|
||||
import com.sismics.docs.core.util.jpa.QueryParam;
|
||||
import com.sismics.docs.core.util.jpa.SortCriteria;
|
||||
import com.sismics.util.ClasspathScanner;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
@ -26,6 +27,13 @@ import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil;
|
||||
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.highlight.Highlighter;
|
||||
import org.apache.lucene.search.highlight.QueryScorer;
|
||||
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
|
||||
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
||||
import org.apache.lucene.search.spell.LuceneDictionary;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.NoLockFactory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
@ -207,7 +215,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
|
||||
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
|
||||
Map<String, Object> parameterMap = new HashMap<>();
|
||||
List<String> criteriaList = new ArrayList<>();
|
||||
|
||||
@ -247,6 +255,8 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
}
|
||||
criteriaList.add("d.DOC_ID_C in :documentIdList");
|
||||
parameterMap.put("documentIdList", documentIdList);
|
||||
|
||||
suggestSearchTerms(criteria.getSearch(), suggestionList);
|
||||
}
|
||||
if (criteria.getCreateDateMin() != null) {
|
||||
criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin");
|
||||
@ -326,6 +336,30 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
paginatedList.setResultList(documentDtoList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Suggest search terms according to the user query.
|
||||
*
|
||||
* @param search User search query
|
||||
* @param suggestionList Suggestion of search query (updated by side effects)
|
||||
* @throws Exception e
|
||||
*/
|
||||
private void suggestSearchTerms(String search, List<String> suggestionList) throws Exception {
|
||||
DirectoryReader directoryReader = getDirectoryReader();
|
||||
if (directoryReader == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
FuzzySuggester suggester = new FuzzySuggester(new StandardAnalyzer());
|
||||
LuceneDictionary dictionary = new LuceneDictionary(directoryReader, "title");
|
||||
suggester.build(dictionary);
|
||||
int lastIndex = search.lastIndexOf(' ');
|
||||
String suggestQuery = search.substring(lastIndex < 0 ? 0 : lastIndex);
|
||||
List<Lookup.LookupResult> lookupResultList = suggester.lookup(suggestQuery, false, 10);
|
||||
for (Lookup.LookupResult lookupResult : lookupResultList) {
|
||||
suggestionList.add(lookupResult.key.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fulltext search in files and documents.
|
||||
*
|
||||
@ -336,27 +370,28 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
*/
|
||||
private Set<String> search(String searchQuery, String fullSearchQuery) throws Exception {
|
||||
// Escape query and add quotes so QueryParser generate a PhraseQuery
|
||||
searchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
|
||||
fullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";
|
||||
String escapedSearchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
|
||||
String escapedFullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";
|
||||
|
||||
// Build search query
|
||||
StandardQueryParser qpHelper = new StandardQueryParser(new StandardAnalyzer());
|
||||
Analyzer analyzer = new StandardAnalyzer();
|
||||
StandardQueryParser qpHelper = new StandardQueryParser(analyzer);
|
||||
qpHelper.setPhraseSlop(100); // PhraseQuery add terms
|
||||
|
||||
// Search on documents and files
|
||||
BooleanQuery query = new BooleanQuery.Builder()
|
||||
.add(qpHelper.parse(searchQuery, "title"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "description"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "subject"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "identifier"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "publisher"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "format"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "source"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "type"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "coverage"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "rights"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(searchQuery, "filename"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(fullSearchQuery, "content"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "title"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "description"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "subject"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "identifier"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "publisher"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "format"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "source"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "type"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "coverage"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "rights"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedSearchQuery, "filename"), BooleanClause.Occur.SHOULD)
|
||||
.add(qpHelper.parse(escapedFullSearchQuery, "content"), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
|
||||
// Search
|
||||
@ -370,6 +405,10 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
|
||||
ScoreDoc[] docs = topDocs.scoreDocs;
|
||||
|
||||
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<strong>", "</strong>");
|
||||
SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder();
|
||||
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(query));
|
||||
|
||||
// Extract document IDs
|
||||
for (ScoreDoc doc : docs) {
|
||||
org.apache.lucene.document.Document document = searcher.doc(doc.doc);
|
||||
@ -379,6 +418,15 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
documentId = document.get("id");
|
||||
} else if (type.equals("file")) {
|
||||
documentId = document.get("document_id");
|
||||
|
||||
/*
|
||||
needs full reindexing from previous version to make it work, we now need the file content
|
||||
String content = document.get("content");
|
||||
if (content != null) {
|
||||
String hl = highlighter.getBestFragment(analyzer, "content", content);
|
||||
System.out.println(hl);
|
||||
}
|
||||
*/
|
||||
}
|
||||
if (documentId != null) {
|
||||
documentIdList.add(documentId);
|
||||
@ -447,7 +495,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
|
||||
}
|
||||
if (file.getContent() != null) {
|
||||
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO));
|
||||
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.YES));
|
||||
}
|
||||
|
||||
return luceneDocument;
|
||||
|
@ -367,11 +367,12 @@ public class DocumentResource extends BaseResource {
|
||||
|
||||
TagDao tagDao = new TagDao();
|
||||
PaginatedList<DocumentDto> paginatedList = PaginatedLists.create(limit, offset);
|
||||
List<String> suggestionList = Lists.newArrayList();
|
||||
SortCriteria sortCriteria = new SortCriteria(sortColumn, asc);
|
||||
DocumentCriteria documentCriteria = parseSearchQuery(search);
|
||||
documentCriteria.setTargetIdList(getTargetIdList(null));
|
||||
try {
|
||||
AppContext.getInstance().getIndexingHandler().findByCriteria(paginatedList, documentCriteria, sortCriteria);
|
||||
AppContext.getInstance().getIndexingHandler().findByCriteria(paginatedList, suggestionList, documentCriteria, sortCriteria);
|
||||
} catch (Exception e) {
|
||||
throw new ServerException("SearchError", "Error searching in documents", e);
|
||||
}
|
||||
@ -402,8 +403,15 @@ public class DocumentResource extends BaseResource {
|
||||
.add("file_count", documentDto.getFileCount())
|
||||
.add("tags", tags));
|
||||
}
|
||||
|
||||
JsonArrayBuilder suggestions = Json.createArrayBuilder();
|
||||
for (String suggestion : suggestionList) {
|
||||
suggestions.add(suggestion);
|
||||
}
|
||||
|
||||
response.add("total", paginatedList.getResultCount())
|
||||
.add("documents", documents);
|
||||
.add("documents", documents)
|
||||
.add("suggestions", suggestions);
|
||||
|
||||
return Response.ok().entity(response.build()).build();
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ angular.module('docs').controller('Document', function ($scope, $rootScope, $tim
|
||||
.then(function (data) {
|
||||
$scope.documents = data.documents;
|
||||
$scope.totalDocuments = data.total;
|
||||
$scope.suggestions = data.suggestions;
|
||||
});
|
||||
};
|
||||
|
||||
|
@ -19,7 +19,10 @@
|
||||
<!-- Search (simple and advanced) -->
|
||||
<div class="row search-dropdown-anchor">
|
||||
<div class="col-xs-12 input-group">
|
||||
<input type="search" class="form-control" ng-attr-placeholder="{{ 'document.search' | translate }}" ng-model="search" />
|
||||
<input type="search" class="form-control"
|
||||
uib-typeahead="suggestion for suggestion in suggestions"
|
||||
typeahead-focus-first="false"
|
||||
ng-attr-placeholder="{{ 'document.search' | translate }}" ng-model="search" />
|
||||
<span class="input-group-addon btn" ng-click="openSearch()">
|
||||
<div uib-dropdown
|
||||
auto-close="disabled" is-open="searchOpened" dropdown-append-to="searchDropdownAnchor">
|
||||
|
12
pom.xml
12
pom.xml
@ -388,6 +388,18 @@
|
||||
<version>${org.apache.lucene.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-suggest</artifactId>
|
||||
<version>${org.apache.lucene.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-highlighter</artifactId>
|
||||
<version>${org.apache.lucene.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.imgscalr</groupId>
|
||||
<artifactId>imgscalr-lib</artifactId>
|
||||
|
Loading…
x
Reference in New Issue
Block a user