mirror of
https://github.com/sismics/docs.git
synced 2024-12-22 11:23:48 +01:00
Make search for documents faster for large dataset (#698)
This commit is contained in:
parent
ce30b1a6ff
commit
a89543b555
@ -4,13 +4,16 @@ import com.sismics.docs.core.constant.AuditLogType;
|
||||
import com.sismics.docs.core.model.jpa.File;
|
||||
import com.sismics.docs.core.util.AuditLogUtil;
|
||||
import com.sismics.util.context.ThreadLocalContext;
|
||||
|
||||
import jakarta.persistence.EntityManager;
|
||||
import jakarta.persistence.NoResultException;
|
||||
import jakarta.persistence.Query;
|
||||
import jakarta.persistence.TypedQuery;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
@ -213,6 +216,24 @@ public class FileDao {
|
||||
return q.getResultList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get files count by documents IDs.
|
||||
*
|
||||
* @param documentIds Documents IDs
|
||||
* @return the number of files per document id
|
||||
*/
|
||||
public Map<String, Long> countByDocumentsIds(Iterable<String> documentIds) {
|
||||
EntityManager em = ThreadLocalContext.get().getEntityManager();
|
||||
Query q = em.createQuery("select f.documentId, count(*) from File f where f.documentId in :documentIds and f.latestVersion = true and f.deleteDate is null group by (f.documentId)");
|
||||
q.setParameter("documentIds", documentIds);
|
||||
Map<String, Long> result = new HashMap<>();
|
||||
q.getResultList().forEach(o -> {
|
||||
Object[] resultLine = (Object[]) o;
|
||||
result.put((String) resultLine[0], (Long) resultLine[1]);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all files from a version.
|
||||
*
|
||||
|
@ -26,9 +26,18 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.highlight.Highlighter;
|
||||
import org.apache.lucene.search.highlight.QueryScorer;
|
||||
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
|
||||
@ -47,7 +56,12 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Timestamp;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Lucene indexing handler.
|
||||
@ -242,32 +256,27 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
|
||||
StringBuilder sb = new StringBuilder("select distinct d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, d.DOC_LANGUAGE_C c4, d.DOC_IDFILE_C, ");
|
||||
sb.append(" s.count c5, ");
|
||||
sb.append(" f.count c6, ");
|
||||
sb.append(" rs2.RTP_ID_C c7, rs2.RTP_NAME_C, d.DOC_UPDATEDATE_D c8 ");
|
||||
sb.append(" from T_DOCUMENT d ");
|
||||
sb.append(" left join (SELECT count(s.SHA_ID_C) count, ac.ACL_SOURCEID_C " +
|
||||
" FROM T_SHARE s, T_ACL ac " +
|
||||
" WHERE ac.ACL_TARGETID_C = s.SHA_ID_C AND ac.ACL_DELETEDATE_D IS NULL AND " +
|
||||
" s.SHA_DELETEDATE_D IS NULL group by ac.ACL_SOURCEID_C) s on s.ACL_SOURCEID_C = d.DOC_ID_C " +
|
||||
" left join (SELECT count(f.FIL_ID_C) count, f.FIL_IDDOC_C " +
|
||||
" FROM T_FILE f " +
|
||||
" WHERE f.FIL_DELETEDATE_D is null group by f.FIL_IDDOC_C) f on f.FIL_IDDOC_C = d.DOC_ID_C ");
|
||||
" s.SHA_DELETEDATE_D IS NULL group by ac.ACL_SOURCEID_C) s on s.ACL_SOURCEID_C = d.DOC_ID_C ");
|
||||
sb.append(" left join (select rs.*, rs3.idDocument " +
|
||||
"from T_ROUTE_STEP rs " +
|
||||
"join (select r.RTE_IDDOCUMENT_C idDocument, rs.RTP_IDROUTE_C idRoute, min(rs.RTP_ORDER_N) minOrder from T_ROUTE_STEP rs join T_ROUTE r on r.RTE_ID_C = rs.RTP_IDROUTE_C and r.RTE_DELETEDATE_D is null where rs.RTP_DELETEDATE_D is null and rs.RTP_ENDDATE_D is null group by rs.RTP_IDROUTE_C, r.RTE_IDDOCUMENT_C) rs3 on rs.RTP_IDROUTE_C = rs3.idRoute and rs.RTP_ORDER_N = rs3.minOrder " +
|
||||
"where rs.RTP_IDTARGET_C in (:targetIdList)) rs2 on rs2.idDocument = d.DOC_ID_C ");
|
||||
|
||||
// Add search criterias
|
||||
if (criteria.getTargetIdList() != null) {
|
||||
if (!SecurityUtil.skipAclCheck(criteria.getTargetIdList())) {
|
||||
// Read permission is enough for searching
|
||||
sb.append(" left join T_ACL a on a.ACL_TARGETID_C in (:targetIdList) and a.ACL_SOURCEID_C = d.DOC_ID_C and a.ACL_PERM_C = 'READ' and a.ACL_DELETEDATE_D is null ");
|
||||
sb.append(" left join T_DOCUMENT_TAG dta on dta.DOT_IDDOCUMENT_C = d.DOC_ID_C and dta.DOT_DELETEDATE_D is null ");
|
||||
sb.append(" left join T_ACL a2 on a2.ACL_TARGETID_C in (:targetIdList) and a2.ACL_SOURCEID_C = dta.DOT_IDTAG_C and a2.ACL_PERM_C = 'READ' and a2.ACL_DELETEDATE_D is null ");
|
||||
criteriaList.add("(a.ACL_ID_C is not null or a2.ACL_ID_C is not null)");
|
||||
}
|
||||
parameterMap.put("targetIdList", criteria.getTargetIdList());
|
||||
if (!SecurityUtil.skipAclCheck(criteria.getTargetIdList())) {
|
||||
// Read permission is enough for searching
|
||||
sb.append(" left join T_ACL a on a.ACL_TARGETID_C in (:targetIdList) and a.ACL_SOURCEID_C = d.DOC_ID_C and a.ACL_PERM_C = 'READ' and a.ACL_DELETEDATE_D is null ");
|
||||
sb.append(" left join T_DOCUMENT_TAG dta on dta.DOT_IDDOCUMENT_C = d.DOC_ID_C and dta.DOT_DELETEDATE_D is null ");
|
||||
sb.append(" left join T_ACL a2 on a2.ACL_TARGETID_C in (:targetIdList) and a2.ACL_SOURCEID_C = dta.DOT_IDTAG_C and a2.ACL_PERM_C = 'READ' and a2.ACL_DELETEDATE_D is null ");
|
||||
criteriaList.add("(a.ACL_ID_C is not null or a2.ACL_ID_C is not null)");
|
||||
}
|
||||
parameterMap.put("targetIdList", criteria.getTargetIdList());
|
||||
|
||||
if (!Strings.isNullOrEmpty(criteria.getSearch()) || !Strings.isNullOrEmpty(criteria.getFullSearch())) {
|
||||
documentSearchMap = search(criteria.getSearch(), criteria.getFullSearch());
|
||||
if (documentSearchMap.isEmpty()) {
|
||||
@ -312,7 +321,7 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
criteriaList.add("(" + Joiner.on(" OR ").join(tagCriteriaList) + ")");
|
||||
}
|
||||
}
|
||||
if (criteria.getExcludedTagIdList() != null && !criteria.getExcludedTagIdList().isEmpty()) {
|
||||
if (!criteria.getExcludedTagIdList().isEmpty()) {
|
||||
int index = 0;
|
||||
for (List<String> tagIdList : criteria.getExcludedTagIdList()) {
|
||||
List<String> tagCriteriaList = Lists.newArrayList();
|
||||
@ -367,8 +376,6 @@ public class LuceneIndexingHandler implements IndexingHandler {
|
||||
documentDto.setFileId((String) o[i++]);
|
||||
Number shareCount = (Number) o[i++];
|
||||
documentDto.setShared(shareCount != null && shareCount.intValue() > 0);
|
||||
Number fileCount = (Number) o[i++];
|
||||
documentDto.setFileCount(fileCount == null ? 0 : fileCount.intValue());
|
||||
documentDto.setActiveRoute(o[i++] != null);
|
||||
documentDto.setCurrentStepName((String) o[i++]);
|
||||
documentDto.setUpdateTimestamp(((Timestamp) o[i]).getTime());
|
||||
|
@ -1 +1 @@
|
||||
db.version=29
|
||||
db.version=30
|
||||
|
@ -0,0 +1,2 @@
|
||||
create index IDX_FIL_IDDOC_C ON T_FILE (FIL_IDDOC_C ASC);
|
||||
update T_CONFIG set CFG_VALUE_C = '30' where CFG_ID_C = 'DB_VERSION';
|
@ -1,3 +1,3 @@
|
||||
api.current_version=${project.version}
|
||||
api.min_version=1.0
|
||||
db.version=29
|
||||
db.version=30
|
||||
|
@ -7,10 +7,22 @@ import com.sismics.docs.core.constant.AclType;
|
||||
import com.sismics.docs.core.constant.ConfigType;
|
||||
import com.sismics.docs.core.constant.Constants;
|
||||
import com.sismics.docs.core.constant.PermType;
|
||||
import com.sismics.docs.core.dao.*;
|
||||
import com.sismics.docs.core.dao.AclDao;
|
||||
import com.sismics.docs.core.dao.ContributorDao;
|
||||
import com.sismics.docs.core.dao.DocumentDao;
|
||||
import com.sismics.docs.core.dao.FileDao;
|
||||
import com.sismics.docs.core.dao.RelationDao;
|
||||
import com.sismics.docs.core.dao.RouteStepDao;
|
||||
import com.sismics.docs.core.dao.TagDao;
|
||||
import com.sismics.docs.core.dao.UserDao;
|
||||
import com.sismics.docs.core.dao.criteria.DocumentCriteria;
|
||||
import com.sismics.docs.core.dao.criteria.TagCriteria;
|
||||
import com.sismics.docs.core.dao.dto.*;
|
||||
import com.sismics.docs.core.dao.dto.AclDto;
|
||||
import com.sismics.docs.core.dao.dto.ContributorDto;
|
||||
import com.sismics.docs.core.dao.dto.DocumentDto;
|
||||
import com.sismics.docs.core.dao.dto.RelationDto;
|
||||
import com.sismics.docs.core.dao.dto.RouteStepDto;
|
||||
import com.sismics.docs.core.dao.dto.TagDto;
|
||||
import com.sismics.docs.core.event.DocumentCreatedAsyncEvent;
|
||||
import com.sismics.docs.core.event.DocumentDeletedAsyncEvent;
|
||||
import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent;
|
||||
@ -38,6 +50,21 @@ import com.sismics.util.EmailUtil;
|
||||
import com.sismics.util.JsonUtil;
|
||||
import com.sismics.util.context.ThreadLocalContext;
|
||||
import com.sismics.util.mime.MimeType;
|
||||
import jakarta.json.Json;
|
||||
import jakarta.json.JsonArrayBuilder;
|
||||
import jakarta.json.JsonObjectBuilder;
|
||||
import jakarta.ws.rs.Consumes;
|
||||
import jakarta.ws.rs.DELETE;
|
||||
import jakarta.ws.rs.FormParam;
|
||||
import jakarta.ws.rs.GET;
|
||||
import jakarta.ws.rs.NotFoundException;
|
||||
import jakarta.ws.rs.POST;
|
||||
import jakarta.ws.rs.PUT;
|
||||
import jakarta.ws.rs.Path;
|
||||
import jakarta.ws.rs.PathParam;
|
||||
import jakarta.ws.rs.QueryParam;
|
||||
import jakarta.ws.rs.core.Response;
|
||||
import jakarta.ws.rs.core.StreamingOutput;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.glassfish.jersey.media.multipart.FormDataBodyPart;
|
||||
@ -48,22 +75,25 @@ import org.joda.time.format.DateTimeFormatter;
|
||||
import org.joda.time.format.DateTimeFormatterBuilder;
|
||||
import org.joda.time.format.DateTimeParser;
|
||||
|
||||
import jakarta.json.Json;
|
||||
import jakarta.json.JsonArrayBuilder;
|
||||
import jakarta.json.JsonObjectBuilder;
|
||||
import javax.mail.Message;
|
||||
import javax.mail.MessagingException;
|
||||
import javax.mail.Session;
|
||||
import javax.mail.internet.MimeMessage;
|
||||
import jakarta.ws.rs.*;
|
||||
import jakarta.ws.rs.core.Response;
|
||||
import jakarta.ws.rs.core.StreamingOutput;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Document REST resources.
|
||||
@ -443,11 +473,14 @@ public class DocumentResource extends BaseResource {
|
||||
}
|
||||
|
||||
// Find the files of the documents
|
||||
Iterable<String> documentsIds = CollectionUtils.collect(paginatedList.getResultList(), DocumentDto::getId);
|
||||
FileDao fileDao = new FileDao();
|
||||
List<File> filesList = null;
|
||||
Map<String, Long> filesCountByDocument = null;
|
||||
if (Boolean.TRUE == files) {
|
||||
Iterable<String> documentsIds = CollectionUtils.collect(paginatedList.getResultList(), DocumentDto::getId);
|
||||
FileDao fileDao = new FileDao();
|
||||
filesList = fileDao.getByDocumentsIds(documentsIds);
|
||||
} else {
|
||||
filesCountByDocument = fileDao.countByDocumentsIds(documentsIds);
|
||||
}
|
||||
|
||||
for (DocumentDto documentDto : paginatedList.getResultList()) {
|
||||
@ -463,6 +496,16 @@ public class DocumentResource extends BaseResource {
|
||||
.add("color", tagDto.getColor()));
|
||||
}
|
||||
|
||||
Long filesCount;
|
||||
Collection<File> filesOfDocument = null;
|
||||
if (Boolean.TRUE == files) {
|
||||
// Find files matching the document
|
||||
filesOfDocument = CollectionUtils.select(filesList, file -> file.getDocumentId().equals(documentDto.getId()));
|
||||
filesCount = (long) filesOfDocument.size();
|
||||
} else {
|
||||
filesCount = filesCountByDocument.getOrDefault(documentDto.getId(), 0L);
|
||||
}
|
||||
|
||||
JsonObjectBuilder documentObjectBuilder = Json.createObjectBuilder()
|
||||
.add("id", documentDto.getId())
|
||||
.add("highlight", JsonUtil.nullable(documentDto.getHighlight()))
|
||||
@ -475,12 +518,10 @@ public class DocumentResource extends BaseResource {
|
||||
.add("shared", documentDto.getShared())
|
||||
.add("active_route", documentDto.isActiveRoute())
|
||||
.add("current_step_name", JsonUtil.nullable(documentDto.getCurrentStepName()))
|
||||
.add("file_count", documentDto.getFileCount())
|
||||
.add("file_count", filesCount)
|
||||
.add("tags", tags);
|
||||
if (Boolean.TRUE == files) {
|
||||
JsonArrayBuilder filesArrayBuilder = Json.createArrayBuilder();
|
||||
// Find files matching the document
|
||||
Collection<File> filesOfDocument = CollectionUtils.select(filesList, file -> file.getDocumentId().equals(documentDto.getId()));
|
||||
for (File fileDb : filesOfDocument) {
|
||||
filesArrayBuilder.add(RestUtil.fileToJsonObjectBuilder(fileDb));
|
||||
}
|
||||
|
@ -1,3 +1,3 @@
|
||||
api.current_version=${project.version}
|
||||
api.min_version=1.0
|
||||
db.version=29
|
||||
db.version=30
|
||||
|
Loading…
Reference in New Issue
Block a user