From a89543b55591a6a71dcc06d03ea2f44294936748 Mon Sep 17 00:00:00 2001 From: Julien Kirch Date: Sun, 8 Oct 2023 22:07:01 +0200 Subject: [PATCH] Make search for documents faster for large dataset (#698) --- .../com/sismics/docs/core/dao/FileDao.java | 23 ++++++- .../util/indexing/LuceneIndexingHandler.java | 47 +++++++------ .../src/main/resources/config.properties | 2 +- .../resources/db/update/dbupdate-030-0.sql | 2 + docs-web/src/dev/resources/config.properties | 2 +- .../docs/rest/resource/DocumentResource.java | 69 +++++++++++++++---- docs-web/src/prod/resources/config.properties | 2 +- 7 files changed, 109 insertions(+), 38 deletions(-) create mode 100644 docs-core/src/main/resources/db/update/dbupdate-030-0.sql diff --git a/docs-core/src/main/java/com/sismics/docs/core/dao/FileDao.java b/docs-core/src/main/java/com/sismics/docs/core/dao/FileDao.java index 97d47e2c..b66fbaf8 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/dao/FileDao.java +++ b/docs-core/src/main/java/com/sismics/docs/core/dao/FileDao.java @@ -4,13 +4,16 @@ import com.sismics.docs.core.constant.AuditLogType; import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.util.AuditLogUtil; import com.sismics.util.context.ThreadLocalContext; - import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; +import jakarta.persistence.Query; import jakarta.persistence.TypedQuery; + import java.util.Collections; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.UUID; /** @@ -213,6 +216,24 @@ public class FileDao { return q.getResultList(); } + /** + * Get files count by documents IDs. + * + * @param documentIds Documents IDs + * @return the number of files per document id + */ + public Map countByDocumentsIds(Iterable documentIds) { + EntityManager em = ThreadLocalContext.get().getEntityManager(); + Query q = em.createQuery("select f.documentId, count(*) from File f where f.documentId in :documentIds and f.latestVersion = true and f.deleteDate is null group by (f.documentId)"); + q.setParameter("documentIds", documentIds); + Map result = new HashMap<>(); + q.getResultList().forEach(o -> { + Object[] resultLine = (Object[]) o; + result.put((String) resultLine[0], (Long) resultLine[1]); + }); + return result; + } + /** * Get all files from a version. * diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/indexing/LuceneIndexingHandler.java b/docs-core/src/main/java/com/sismics/docs/core/util/indexing/LuceneIndexingHandler.java index bdb0f030..27a33547 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/util/indexing/LuceneIndexingHandler.java +++ b/docs-core/src/main/java/com/sismics/docs/core/util/indexing/LuceneIndexingHandler.java @@ -26,9 +26,18 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.*; +import org.apache.lucene.index.CheckIndex; +import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.simple.SimpleQueryParser; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLEncoder; @@ -47,7 +56,12 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.sql.Timestamp; -import java.util.*; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; /** * Lucene indexing handler. @@ -242,32 +256,27 @@ public class LuceneIndexingHandler implements IndexingHandler { StringBuilder sb = new StringBuilder("select distinct d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, d.DOC_LANGUAGE_C c4, d.DOC_IDFILE_C, "); sb.append(" s.count c5, "); - sb.append(" f.count c6, "); sb.append(" rs2.RTP_ID_C c7, rs2.RTP_NAME_C, d.DOC_UPDATEDATE_D c8 "); sb.append(" from T_DOCUMENT d "); sb.append(" left join (SELECT count(s.SHA_ID_C) count, ac.ACL_SOURCEID_C " + " FROM T_SHARE s, T_ACL ac " + " WHERE ac.ACL_TARGETID_C = s.SHA_ID_C AND ac.ACL_DELETEDATE_D IS NULL AND " + - " s.SHA_DELETEDATE_D IS NULL group by ac.ACL_SOURCEID_C) s on s.ACL_SOURCEID_C = d.DOC_ID_C " + - " left join (SELECT count(f.FIL_ID_C) count, f.FIL_IDDOC_C " + - " FROM T_FILE f " + - " WHERE f.FIL_DELETEDATE_D is null group by f.FIL_IDDOC_C) f on f.FIL_IDDOC_C = d.DOC_ID_C "); + " s.SHA_DELETEDATE_D IS NULL group by ac.ACL_SOURCEID_C) s on s.ACL_SOURCEID_C = d.DOC_ID_C "); sb.append(" left join (select rs.*, rs3.idDocument " + "from T_ROUTE_STEP rs " + "join (select r.RTE_IDDOCUMENT_C idDocument, rs.RTP_IDROUTE_C idRoute, min(rs.RTP_ORDER_N) minOrder from T_ROUTE_STEP rs join T_ROUTE r on r.RTE_ID_C = rs.RTP_IDROUTE_C and r.RTE_DELETEDATE_D is null where rs.RTP_DELETEDATE_D is null and rs.RTP_ENDDATE_D is null group by rs.RTP_IDROUTE_C, r.RTE_IDDOCUMENT_C) rs3 on rs.RTP_IDROUTE_C = rs3.idRoute and rs.RTP_ORDER_N = rs3.minOrder " + "where rs.RTP_IDTARGET_C in (:targetIdList)) rs2 on rs2.idDocument = d.DOC_ID_C "); // Add search criterias - if (criteria.getTargetIdList() != null) { - if (!SecurityUtil.skipAclCheck(criteria.getTargetIdList())) { - // Read permission is enough for searching - sb.append(" left join T_ACL a on a.ACL_TARGETID_C in (:targetIdList) and a.ACL_SOURCEID_C = d.DOC_ID_C and a.ACL_PERM_C = 'READ' and a.ACL_DELETEDATE_D is null "); - sb.append(" left join T_DOCUMENT_TAG dta on dta.DOT_IDDOCUMENT_C = d.DOC_ID_C and dta.DOT_DELETEDATE_D is null "); - sb.append(" left join T_ACL a2 on a2.ACL_TARGETID_C in (:targetIdList) and a2.ACL_SOURCEID_C = dta.DOT_IDTAG_C and a2.ACL_PERM_C = 'READ' and a2.ACL_DELETEDATE_D is null "); - criteriaList.add("(a.ACL_ID_C is not null or a2.ACL_ID_C is not null)"); - } - parameterMap.put("targetIdList", criteria.getTargetIdList()); + if (!SecurityUtil.skipAclCheck(criteria.getTargetIdList())) { + // Read permission is enough for searching + sb.append(" left join T_ACL a on a.ACL_TARGETID_C in (:targetIdList) and a.ACL_SOURCEID_C = d.DOC_ID_C and a.ACL_PERM_C = 'READ' and a.ACL_DELETEDATE_D is null "); + sb.append(" left join T_DOCUMENT_TAG dta on dta.DOT_IDDOCUMENT_C = d.DOC_ID_C and dta.DOT_DELETEDATE_D is null "); + sb.append(" left join T_ACL a2 on a2.ACL_TARGETID_C in (:targetIdList) and a2.ACL_SOURCEID_C = dta.DOT_IDTAG_C and a2.ACL_PERM_C = 'READ' and a2.ACL_DELETEDATE_D is null "); + criteriaList.add("(a.ACL_ID_C is not null or a2.ACL_ID_C is not null)"); } + parameterMap.put("targetIdList", criteria.getTargetIdList()); + if (!Strings.isNullOrEmpty(criteria.getSearch()) || !Strings.isNullOrEmpty(criteria.getFullSearch())) { documentSearchMap = search(criteria.getSearch(), criteria.getFullSearch()); if (documentSearchMap.isEmpty()) { @@ -312,7 +321,7 @@ public class LuceneIndexingHandler implements IndexingHandler { criteriaList.add("(" + Joiner.on(" OR ").join(tagCriteriaList) + ")"); } } - if (criteria.getExcludedTagIdList() != null && !criteria.getExcludedTagIdList().isEmpty()) { + if (!criteria.getExcludedTagIdList().isEmpty()) { int index = 0; for (List tagIdList : criteria.getExcludedTagIdList()) { List tagCriteriaList = Lists.newArrayList(); @@ -367,8 +376,6 @@ public class LuceneIndexingHandler implements IndexingHandler { documentDto.setFileId((String) o[i++]); Number shareCount = (Number) o[i++]; documentDto.setShared(shareCount != null && shareCount.intValue() > 0); - Number fileCount = (Number) o[i++]; - documentDto.setFileCount(fileCount == null ? 0 : fileCount.intValue()); documentDto.setActiveRoute(o[i++] != null); documentDto.setCurrentStepName((String) o[i++]); documentDto.setUpdateTimestamp(((Timestamp) o[i]).getTime()); diff --git a/docs-core/src/main/resources/config.properties b/docs-core/src/main/resources/config.properties index 1af340d6..435fb302 100644 --- a/docs-core/src/main/resources/config.properties +++ b/docs-core/src/main/resources/config.properties @@ -1 +1 @@ -db.version=29 +db.version=30 diff --git a/docs-core/src/main/resources/db/update/dbupdate-030-0.sql b/docs-core/src/main/resources/db/update/dbupdate-030-0.sql new file mode 100644 index 00000000..be80c0ef --- /dev/null +++ b/docs-core/src/main/resources/db/update/dbupdate-030-0.sql @@ -0,0 +1,2 @@ +create index IDX_FIL_IDDOC_C ON T_FILE (FIL_IDDOC_C ASC); +update T_CONFIG set CFG_VALUE_C = '30' where CFG_ID_C = 'DB_VERSION'; diff --git a/docs-web/src/dev/resources/config.properties b/docs-web/src/dev/resources/config.properties index 6e92028f..37e03ad0 100644 --- a/docs-web/src/dev/resources/config.properties +++ b/docs-web/src/dev/resources/config.properties @@ -1,3 +1,3 @@ api.current_version=${project.version} api.min_version=1.0 -db.version=29 +db.version=30 diff --git a/docs-web/src/main/java/com/sismics/docs/rest/resource/DocumentResource.java b/docs-web/src/main/java/com/sismics/docs/rest/resource/DocumentResource.java index d0fb99e3..2d897ad8 100644 --- a/docs-web/src/main/java/com/sismics/docs/rest/resource/DocumentResource.java +++ b/docs-web/src/main/java/com/sismics/docs/rest/resource/DocumentResource.java @@ -7,10 +7,22 @@ import com.sismics.docs.core.constant.AclType; import com.sismics.docs.core.constant.ConfigType; import com.sismics.docs.core.constant.Constants; import com.sismics.docs.core.constant.PermType; -import com.sismics.docs.core.dao.*; +import com.sismics.docs.core.dao.AclDao; +import com.sismics.docs.core.dao.ContributorDao; +import com.sismics.docs.core.dao.DocumentDao; +import com.sismics.docs.core.dao.FileDao; +import com.sismics.docs.core.dao.RelationDao; +import com.sismics.docs.core.dao.RouteStepDao; +import com.sismics.docs.core.dao.TagDao; +import com.sismics.docs.core.dao.UserDao; import com.sismics.docs.core.dao.criteria.DocumentCriteria; import com.sismics.docs.core.dao.criteria.TagCriteria; -import com.sismics.docs.core.dao.dto.*; +import com.sismics.docs.core.dao.dto.AclDto; +import com.sismics.docs.core.dao.dto.ContributorDto; +import com.sismics.docs.core.dao.dto.DocumentDto; +import com.sismics.docs.core.dao.dto.RelationDto; +import com.sismics.docs.core.dao.dto.RouteStepDto; +import com.sismics.docs.core.dao.dto.TagDto; import com.sismics.docs.core.event.DocumentCreatedAsyncEvent; import com.sismics.docs.core.event.DocumentDeletedAsyncEvent; import com.sismics.docs.core.event.DocumentUpdatedAsyncEvent; @@ -38,6 +50,21 @@ import com.sismics.util.EmailUtil; import com.sismics.util.JsonUtil; import com.sismics.util.context.ThreadLocalContext; import com.sismics.util.mime.MimeType; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObjectBuilder; +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.DELETE; +import jakarta.ws.rs.FormParam; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.NotFoundException; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.PUT; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.QueryParam; +import jakarta.ws.rs.core.Response; +import jakarta.ws.rs.core.StreamingOutput; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.glassfish.jersey.media.multipart.FormDataBodyPart; @@ -48,22 +75,25 @@ import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.DateTimeFormatterBuilder; import org.joda.time.format.DateTimeParser; -import jakarta.json.Json; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonObjectBuilder; import javax.mail.Message; import javax.mail.MessagingException; import javax.mail.Session; import javax.mail.internet.MimeMessage; -import jakarta.ws.rs.*; -import jakarta.ws.rs.core.Response; -import jakarta.ws.rs.core.StreamingOutput; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.text.MessageFormat; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.UUID; /** * Document REST resources. @@ -443,11 +473,14 @@ public class DocumentResource extends BaseResource { } // Find the files of the documents + Iterable documentsIds = CollectionUtils.collect(paginatedList.getResultList(), DocumentDto::getId); + FileDao fileDao = new FileDao(); List filesList = null; + Map filesCountByDocument = null; if (Boolean.TRUE == files) { - Iterable documentsIds = CollectionUtils.collect(paginatedList.getResultList(), DocumentDto::getId); - FileDao fileDao = new FileDao(); filesList = fileDao.getByDocumentsIds(documentsIds); + } else { + filesCountByDocument = fileDao.countByDocumentsIds(documentsIds); } for (DocumentDto documentDto : paginatedList.getResultList()) { @@ -463,6 +496,16 @@ public class DocumentResource extends BaseResource { .add("color", tagDto.getColor())); } + Long filesCount; + Collection filesOfDocument = null; + if (Boolean.TRUE == files) { + // Find files matching the document + filesOfDocument = CollectionUtils.select(filesList, file -> file.getDocumentId().equals(documentDto.getId())); + filesCount = (long) filesOfDocument.size(); + } else { + filesCount = filesCountByDocument.getOrDefault(documentDto.getId(), 0L); + } + JsonObjectBuilder documentObjectBuilder = Json.createObjectBuilder() .add("id", documentDto.getId()) .add("highlight", JsonUtil.nullable(documentDto.getHighlight())) @@ -475,12 +518,10 @@ public class DocumentResource extends BaseResource { .add("shared", documentDto.getShared()) .add("active_route", documentDto.isActiveRoute()) .add("current_step_name", JsonUtil.nullable(documentDto.getCurrentStepName())) - .add("file_count", documentDto.getFileCount()) + .add("file_count", filesCount) .add("tags", tags); if (Boolean.TRUE == files) { JsonArrayBuilder filesArrayBuilder = Json.createArrayBuilder(); - // Find files matching the document - Collection filesOfDocument = CollectionUtils.select(filesList, file -> file.getDocumentId().equals(documentDto.getId())); for (File fileDb : filesOfDocument) { filesArrayBuilder.add(RestUtil.fileToJsonObjectBuilder(fileDb)); } diff --git a/docs-web/src/prod/resources/config.properties b/docs-web/src/prod/resources/config.properties index 6e92028f..37e03ad0 100644 --- a/docs-web/src/prod/resources/config.properties +++ b/docs-web/src/prod/resources/config.properties @@ -1,3 +1,3 @@ api.current_version=${project.version} api.min_version=1.0 -db.version=29 +db.version=30