mirror of
https://github.com/sismics/docs.git
synced 2025-01-18 15:55:09 +01:00
#241: search suggestions
This commit is contained in:
parent
7baf5e44fd
commit
8c37af6207
@ -112,6 +112,16 @@
|
|||||||
<artifactId>lucene-queryparser</artifactId>
|
<artifactId>lucene-queryparser</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.lucene</groupId>
|
||||||
|
<artifactId>lucene-suggest</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.lucene</groupId>
|
||||||
|
<artifactId>lucene-highlighter</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.sun.mail</groupId>
|
<groupId>com.sun.mail</groupId>
|
||||||
<artifactId>javax.mail</artifactId>
|
<artifactId>javax.mail</artifactId>
|
||||||
|
@ -100,9 +100,10 @@ public interface IndexingHandler {
|
|||||||
* Searches documents by criteria.
|
* Searches documents by criteria.
|
||||||
*
|
*
|
||||||
* @param paginatedList List of documents (updated by side effects)
|
* @param paginatedList List of documents (updated by side effects)
|
||||||
|
* @param suggestionList Suggestion of search query (updated by side effects)
|
||||||
* @param criteria Search criteria
|
* @param criteria Search criteria
|
||||||
* @param sortCriteria Sort criteria
|
* @param sortCriteria Sort criteria
|
||||||
* @throws Exception e
|
* @throws Exception e
|
||||||
*/
|
*/
|
||||||
void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception;
|
void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception;
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@ import com.sismics.docs.core.util.jpa.PaginatedLists;
|
|||||||
import com.sismics.docs.core.util.jpa.QueryParam;
|
import com.sismics.docs.core.util.jpa.QueryParam;
|
||||||
import com.sismics.docs.core.util.jpa.SortCriteria;
|
import com.sismics.docs.core.util.jpa.SortCriteria;
|
||||||
import com.sismics.util.ClasspathScanner;
|
import com.sismics.util.ClasspathScanner;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
@ -26,6 +27,13 @@ import org.apache.lucene.index.*;
|
|||||||
import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil;
|
import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil;
|
||||||
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.search.highlight.Highlighter;
|
||||||
|
import org.apache.lucene.search.highlight.QueryScorer;
|
||||||
|
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
|
||||||
|
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
||||||
|
import org.apache.lucene.search.spell.LuceneDictionary;
|
||||||
|
import org.apache.lucene.search.suggest.Lookup;
|
||||||
|
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.NoLockFactory;
|
import org.apache.lucene.store.NoLockFactory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
@ -207,7 +215,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
|
public void findByCriteria(PaginatedList<DocumentDto> paginatedList, List<String> suggestionList, DocumentCriteria criteria, SortCriteria sortCriteria) throws Exception {
|
||||||
Map<String, Object> parameterMap = new HashMap<>();
|
Map<String, Object> parameterMap = new HashMap<>();
|
||||||
List<String> criteriaList = new ArrayList<>();
|
List<String> criteriaList = new ArrayList<>();
|
||||||
|
|
||||||
@ -247,6 +255,8 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
|||||||
}
|
}
|
||||||
criteriaList.add("d.DOC_ID_C in :documentIdList");
|
criteriaList.add("d.DOC_ID_C in :documentIdList");
|
||||||
parameterMap.put("documentIdList", documentIdList);
|
parameterMap.put("documentIdList", documentIdList);
|
||||||
|
|
||||||
|
suggestSearchTerms(criteria.getSearch(), suggestionList);
|
||||||
}
|
}
|
||||||
if (criteria.getCreateDateMin() != null) {
|
if (criteria.getCreateDateMin() != null) {
|
||||||
criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin");
|
criteriaList.add("d.DOC_CREATEDATE_D >= :createDateMin");
|
||||||
@ -326,6 +336,30 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
|||||||
paginatedList.setResultList(documentDtoList);
|
paginatedList.setResultList(documentDtoList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suggest search terms according to the user query.
|
||||||
|
*
|
||||||
|
* @param search User search query
|
||||||
|
* @param suggestionList Suggestion of search query (updated by side effects)
|
||||||
|
* @throws Exception e
|
||||||
|
*/
|
||||||
|
private void suggestSearchTerms(String search, List<String> suggestionList) throws Exception {
|
||||||
|
DirectoryReader directoryReader = getDirectoryReader();
|
||||||
|
if (directoryReader == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FuzzySuggester suggester = new FuzzySuggester(new StandardAnalyzer());
|
||||||
|
LuceneDictionary dictionary = new LuceneDictionary(directoryReader, "title");
|
||||||
|
suggester.build(dictionary);
|
||||||
|
int lastIndex = search.lastIndexOf(' ');
|
||||||
|
String suggestQuery = search.substring(lastIndex < 0 ? 0 : lastIndex);
|
||||||
|
List<Lookup.LookupResult> lookupResultList = suggester.lookup(suggestQuery, false, 10);
|
||||||
|
for (Lookup.LookupResult lookupResult : lookupResultList) {
|
||||||
|
suggestionList.add(lookupResult.key.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fulltext search in files and documents.
|
* Fulltext search in files and documents.
|
||||||
*
|
*
|
||||||
@ -336,27 +370,28 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
|||||||
*/
|
*/
|
||||||
private Set<String> search(String searchQuery, String fullSearchQuery) throws Exception {
|
private Set<String> search(String searchQuery, String fullSearchQuery) throws Exception {
|
||||||
// Escape query and add quotes so QueryParser generate a PhraseQuery
|
// Escape query and add quotes so QueryParser generate a PhraseQuery
|
||||||
searchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
|
String escapedSearchQuery = "\"" + QueryParserUtil.escape(searchQuery + " " + fullSearchQuery) + "\"";
|
||||||
fullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";
|
String escapedFullSearchQuery = "\"" + QueryParserUtil.escape(fullSearchQuery) + "\"";
|
||||||
|
|
||||||
// Build search query
|
// Build search query
|
||||||
StandardQueryParser qpHelper = new StandardQueryParser(new StandardAnalyzer());
|
Analyzer analyzer = new StandardAnalyzer();
|
||||||
|
StandardQueryParser qpHelper = new StandardQueryParser(analyzer);
|
||||||
qpHelper.setPhraseSlop(100); // PhraseQuery add terms
|
qpHelper.setPhraseSlop(100); // PhraseQuery add terms
|
||||||
|
|
||||||
// Search on documents and files
|
// Search on documents and files
|
||||||
BooleanQuery query = new BooleanQuery.Builder()
|
BooleanQuery query = new BooleanQuery.Builder()
|
||||||
.add(qpHelper.parse(searchQuery, "title"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "title"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "description"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "description"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "subject"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "subject"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "identifier"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "identifier"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "publisher"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "publisher"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "format"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "format"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "source"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "source"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "type"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "type"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "coverage"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "coverage"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "rights"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "rights"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(searchQuery, "filename"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedSearchQuery, "filename"), BooleanClause.Occur.SHOULD)
|
||||||
.add(qpHelper.parse(fullSearchQuery, "content"), BooleanClause.Occur.SHOULD)
|
.add(qpHelper.parse(escapedFullSearchQuery, "content"), BooleanClause.Occur.SHOULD)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
// Search
|
// Search
|
||||||
@ -370,6 +405,10 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
|||||||
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
|
TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
|
||||||
ScoreDoc[] docs = topDocs.scoreDocs;
|
ScoreDoc[] docs = topDocs.scoreDocs;
|
||||||
|
|
||||||
|
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<strong>", "</strong>");
|
||||||
|
SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder();
|
||||||
|
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(query));
|
||||||
|
|
||||||
// Extract document IDs
|
// Extract document IDs
|
||||||
for (ScoreDoc doc : docs) {
|
for (ScoreDoc doc : docs) {
|
||||||
org.apache.lucene.document.Document document = searcher.doc(doc.doc);
|
org.apache.lucene.document.Document document = searcher.doc(doc.doc);
|
||||||
@ -379,6 +418,15 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
|||||||
documentId = document.get("id");
|
documentId = document.get("id");
|
||||||
} else if (type.equals("file")) {
|
} else if (type.equals("file")) {
|
||||||
documentId = document.get("document_id");
|
documentId = document.get("document_id");
|
||||||
|
|
||||||
|
/*
|
||||||
|
needs full reindexing from previous version to make it work, we now need the file content
|
||||||
|
String content = document.get("content");
|
||||||
|
if (content != null) {
|
||||||
|
String hl = highlighter.getBestFragment(analyzer, "content", content);
|
||||||
|
System.out.println(hl);
|
||||||
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
if (documentId != null) {
|
if (documentId != null) {
|
||||||
documentIdList.add(documentId);
|
documentIdList.add(documentId);
|
||||||
@ -447,7 +495,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
|||||||
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
|
luceneDocument.add(new StringField("document_id", file.getDocumentId(), Field.Store.YES));
|
||||||
}
|
}
|
||||||
if (file.getContent() != null) {
|
if (file.getContent() != null) {
|
||||||
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.NO));
|
luceneDocument.add(new TextField("content", file.getContent(), Field.Store.YES));
|
||||||
}
|
}
|
||||||
|
|
||||||
return luceneDocument;
|
return luceneDocument;
|
||||||
|
@ -367,11 +367,12 @@ public class DocumentResource extends BaseResource {
|
|||||||
|
|
||||||
TagDao tagDao = new TagDao();
|
TagDao tagDao = new TagDao();
|
||||||
PaginatedList<DocumentDto> paginatedList = PaginatedLists.create(limit, offset);
|
PaginatedList<DocumentDto> paginatedList = PaginatedLists.create(limit, offset);
|
||||||
|
List<String> suggestionList = Lists.newArrayList();
|
||||||
SortCriteria sortCriteria = new SortCriteria(sortColumn, asc);
|
SortCriteria sortCriteria = new SortCriteria(sortColumn, asc);
|
||||||
DocumentCriteria documentCriteria = parseSearchQuery(search);
|
DocumentCriteria documentCriteria = parseSearchQuery(search);
|
||||||
documentCriteria.setTargetIdList(getTargetIdList(null));
|
documentCriteria.setTargetIdList(getTargetIdList(null));
|
||||||
try {
|
try {
|
||||||
AppContext.getInstance().getIndexingHandler().findByCriteria(paginatedList, documentCriteria, sortCriteria);
|
AppContext.getInstance().getIndexingHandler().findByCriteria(paginatedList, suggestionList, documentCriteria, sortCriteria);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ServerException("SearchError", "Error searching in documents", e);
|
throw new ServerException("SearchError", "Error searching in documents", e);
|
||||||
}
|
}
|
||||||
@ -402,8 +403,15 @@ public class DocumentResource extends BaseResource {
|
|||||||
.add("file_count", documentDto.getFileCount())
|
.add("file_count", documentDto.getFileCount())
|
||||||
.add("tags", tags));
|
.add("tags", tags));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JsonArrayBuilder suggestions = Json.createArrayBuilder();
|
||||||
|
for (String suggestion : suggestionList) {
|
||||||
|
suggestions.add(suggestion);
|
||||||
|
}
|
||||||
|
|
||||||
response.add("total", paginatedList.getResultCount())
|
response.add("total", paginatedList.getResultCount())
|
||||||
.add("documents", documents);
|
.add("documents", documents)
|
||||||
|
.add("suggestions", suggestions);
|
||||||
|
|
||||||
return Response.ok().entity(response.build()).build();
|
return Response.ok().entity(response.build()).build();
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,7 @@ angular.module('docs').controller('Document', function ($scope, $rootScope, $tim
|
|||||||
.then(function (data) {
|
.then(function (data) {
|
||||||
$scope.documents = data.documents;
|
$scope.documents = data.documents;
|
||||||
$scope.totalDocuments = data.total;
|
$scope.totalDocuments = data.total;
|
||||||
|
$scope.suggestions = data.suggestions;
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -19,7 +19,10 @@
|
|||||||
<!-- Search (simple and advanced) -->
|
<!-- Search (simple and advanced) -->
|
||||||
<div class="row search-dropdown-anchor">
|
<div class="row search-dropdown-anchor">
|
||||||
<div class="col-xs-12 input-group">
|
<div class="col-xs-12 input-group">
|
||||||
<input type="search" class="form-control" ng-attr-placeholder="{{ 'document.search' | translate }}" ng-model="search" />
|
<input type="search" class="form-control"
|
||||||
|
uib-typeahead="suggestion for suggestion in suggestions"
|
||||||
|
typeahead-focus-first="false"
|
||||||
|
ng-attr-placeholder="{{ 'document.search' | translate }}" ng-model="search" />
|
||||||
<span class="input-group-addon btn" ng-click="openSearch()">
|
<span class="input-group-addon btn" ng-click="openSearch()">
|
||||||
<div uib-dropdown
|
<div uib-dropdown
|
||||||
auto-close="disabled" is-open="searchOpened" dropdown-append-to="searchDropdownAnchor">
|
auto-close="disabled" is-open="searchOpened" dropdown-append-to="searchDropdownAnchor">
|
||||||
|
12
pom.xml
12
pom.xml
@ -388,6 +388,18 @@
|
|||||||
<version>${org.apache.lucene.version}</version>
|
<version>${org.apache.lucene.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.lucene</groupId>
|
||||||
|
<artifactId>lucene-suggest</artifactId>
|
||||||
|
<version>${org.apache.lucene.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.lucene</groupId>
|
||||||
|
<artifactId>lucene-highlighter</artifactId>
|
||||||
|
<version>${org.apache.lucene.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.imgscalr</groupId>
|
<groupId>org.imgscalr</groupId>
|
||||||
<artifactId>imgscalr-lib</artifactId>
|
<artifactId>imgscalr-lib</artifactId>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user