diff --git a/docs-core/src/main/java/com/sismics/docs/core/dao/lucene/LuceneDao.java b/docs-core/src/main/java/com/sismics/docs/core/dao/lucene/LuceneDao.java index 8e9f4675..e5a12b5f 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/dao/lucene/LuceneDao.java +++ b/docs-core/src/main/java/com/sismics/docs/core/dao/lucene/LuceneDao.java @@ -10,8 +10,6 @@ import java.util.Set; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queries.TermsFilter; @@ -23,8 +21,6 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.Version; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.sismics.docs.core.model.context.AppContext; import com.sismics.docs.core.model.jpa.Document; @@ -38,11 +34,6 @@ import com.sismics.docs.core.util.LuceneUtil.LuceneRunnable; * @author bgamard */ public class LuceneDao { - /** - * Logger. - */ - private static final Logger log = LoggerFactory.getLogger(LuceneDao.class); - /** * Destroy and rebuild index. * @@ -178,17 +169,12 @@ public class LuceneDao { TermsFilter userFilter = new TermsFilter(terms); // Search - Set documentIdList = new HashSet(); - if (!DirectoryReader.indexExists(AppContext.getInstance().getLuceneDirectory())) { - log.warn("Lucene directory not yet initialized"); - return documentIdList; - } - IndexReader reader = DirectoryReader.open(AppContext.getInstance().getLuceneDirectory()); - IndexSearcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = new IndexSearcher(AppContext.getInstance().getIndexingService().getDirectoryReader()); TopDocs topDocs = searcher.search(query, userFilter, Integer.MAX_VALUE); ScoreDoc[] docs = topDocs.scoreDocs; // Extract document IDs + Set documentIdList = new HashSet(); for (int i = 0; i < docs.length; i++) { org.apache.lucene.document.Document document = searcher.doc(docs[i].doc); String type = document.get("type"); @@ -201,8 +187,6 @@ public class LuceneDao { documentIdList.add(documentId); } - reader.close(); - return documentIdList; } diff --git a/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileDeletedAsyncListener.java b/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileDeletedAsyncListener.java index 3a08c984..5df9caf1 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileDeletedAsyncListener.java +++ b/docs-core/src/main/java/com/sismics/docs/core/listener/async/FileDeletedAsyncListener.java @@ -1,11 +1,15 @@ package com.sismics.docs.core.listener.async; +import java.nio.file.Paths; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.eventbus.Subscribe; import com.sismics.docs.core.dao.lucene.LuceneDao; import com.sismics.docs.core.event.FileDeletedAsyncEvent; +import com.sismics.docs.core.model.jpa.File; +import com.sismics.docs.core.util.DirectoryUtil; /** * Listener on file deleted. @@ -30,10 +34,20 @@ public class FileDeletedAsyncListener { log.info("File deleted event: " + fileDeletedAsyncEvent.toString()); } - // TODO Delete the file from storage + // Delete the file from storage + File file = fileDeletedAsyncEvent.getFile(); + java.io.File thumbnailFile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file.getId() + "_thumb").toFile(); + java.io.File storedFile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file.getId()).toFile(); + + if (thumbnailFile.exists()) { + thumbnailFile.delete(); + } + if (storedFile.exists()) { + storedFile.delete(); + } // Update Lucene index LuceneDao luceneDao = new LuceneDao(); - luceneDao.deleteDocument(fileDeletedAsyncEvent.getFile().getId()); + luceneDao.deleteDocument(file.getId()); } } diff --git a/docs-core/src/main/java/com/sismics/docs/core/model/context/AppContext.java b/docs-core/src/main/java/com/sismics/docs/core/model/context/AppContext.java index 24b842fd..f95ec2e5 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/model/context/AppContext.java +++ b/docs-core/src/main/java/com/sismics/docs/core/model/context/AppContext.java @@ -7,8 +7,6 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import org.apache.lucene.store.Directory; - import com.google.common.eventbus.AsyncEventBus; import com.google.common.eventbus.EventBus; import com.sismics.docs.core.constant.ConfigType; @@ -51,11 +49,6 @@ public class AppContext { */ private IndexingService indexingService; - /** - * Lucene directory. - */ - private Directory luceneDirectory; - /** * Asynchronous executors. */ @@ -71,8 +64,6 @@ public class AppContext { Config luceneStorageConfig = configDao.getById(ConfigType.LUCENE_DIRECTORY_STORAGE); indexingService = new IndexingService(luceneStorageConfig != null ? luceneStorageConfig.getValue() : null); indexingService.startAndWait(); - - luceneDirectory = indexingService.getDirectory(); } /** @@ -165,20 +156,11 @@ public class AppContext { } /** - * Getter of feedService. + * Getter of indexingService. * - * @return feedService + * @return indexingService */ public IndexingService getIndexingService() { return indexingService; } - - /** - * Getter of- luceneDirectory. - * - * @return the luceneDirectory - */ - public Directory getLuceneDirectory() { - return luceneDirectory; - } } diff --git a/docs-core/src/main/java/com/sismics/docs/core/service/IndexingService.java b/docs-core/src/main/java/com/sismics/docs/core/service/IndexingService.java index e6def46f..af744562 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/service/IndexingService.java +++ b/docs-core/src/main/java/com/sismics/docs/core/service/IndexingService.java @@ -4,6 +4,7 @@ import java.io.File; import java.io.IOException; import java.util.concurrent.TimeUnit; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.SimpleFSDirectory; @@ -34,6 +35,11 @@ public class IndexingService extends AbstractScheduledService { */ private Directory directory; + /** + * Directory reader. + */ + private DirectoryReader directoryReader; + /** * Lucene storage config. */ @@ -62,10 +68,16 @@ public class IndexingService extends AbstractScheduledService { @Override protected void shutDown() { - Directory luceneIndex = AppContext.getInstance().getLuceneDirectory(); - if (luceneIndex != null) { + if (directoryReader != null) { try { - luceneIndex.close(); + directoryReader.close(); + } catch (IOException e) { + log.error("Error closing the index reader", e); + } + } + if (directory != null) { + try { + directory.close(); } catch (IOException e) { log.error("Error closing Lucene index", e); } @@ -105,4 +117,36 @@ public class IndexingService extends AbstractScheduledService { public Directory getDirectory() { return directory; } + + /** + * Returns a valid directory reader. + * Take care of reopening the reader if the index has changed + * and closing the previous one. + * + * @return the directoryReader + */ + public DirectoryReader getDirectoryReader() { + if (directoryReader == null) { + if (!DirectoryReader.indexExists(directory)) { + log.info("Lucene directory not yet created"); + return null; + } + try { + directoryReader = DirectoryReader.open(directory); + } catch (IOException e) { + log.error("Error creating the directory reader", e); + } + } else { + try { + DirectoryReader newReader = DirectoryReader.openIfChanged(directoryReader); + if (newReader != null) { + directoryReader.close(); + directoryReader = newReader; + } + } catch (IOException e) { + log.error("Error while reopening the directory reader", e); + } + } + return directoryReader; + } } diff --git a/docs-core/src/main/java/com/sismics/docs/core/util/LuceneUtil.java b/docs-core/src/main/java/com/sismics/docs/core/util/LuceneUtil.java index 9f33bcd2..0aba6e34 100644 --- a/docs-core/src/main/java/com/sismics/docs/core/util/LuceneUtil.java +++ b/docs-core/src/main/java/com/sismics/docs/core/util/LuceneUtil.java @@ -37,7 +37,7 @@ public class LuceneUtil { config.setMergeScheduler(new SerialMergeScheduler()); // Creating index writer - Directory directory = AppContext.getInstance().getLuceneDirectory(); + Directory directory = AppContext.getInstance().getIndexingService().getDirectory(); IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(directory, config); diff --git a/docs-parent/TODO b/docs-parent/TODO index d0460833..b4b67624 100644 --- a/docs-parent/TODO +++ b/docs-parent/TODO @@ -1,7 +1,6 @@ - New image rescale between thumbnail and original (client/server) - Batch to regenerate all thumbnails (server) -- Delete files on FS when a file in database is deleted (server) -- Batch to delete unused files on FS (server) - Special criteria to search inside OCR-ed content (eg. full:uranium) (server) - Special criteria to search on a specific time span (eg. at:2013-06) (server) -- Show help on special criterias (client) \ No newline at end of file +- Show help on special criterias (client) +- Disable Add/Edit button while uploading (client) \ No newline at end of file diff --git a/docs-web/src/main/java/com/sismics/docs/rest/resource/AppResource.java b/docs-web/src/main/java/com/sismics/docs/rest/resource/AppResource.java index 655fbdbe..98cbd081 100644 --- a/docs-web/src/main/java/com/sismics/docs/rest/resource/AppResource.java +++ b/docs-web/src/main/java/com/sismics/docs/rest/resource/AppResource.java @@ -1,7 +1,9 @@ package com.sismics.docs.rest.resource; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.ResourceBundle; import javax.ws.rs.GET; @@ -19,11 +21,14 @@ import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; import com.sismics.docs.core.dao.jpa.DocumentDao; +import com.sismics.docs.core.dao.jpa.FileDao; import com.sismics.docs.core.dao.jpa.criteria.DocumentCriteria; import com.sismics.docs.core.dao.jpa.dto.DocumentDto; import com.sismics.docs.core.event.OcrFileAsyncEvent; import com.sismics.docs.core.model.context.AppContext; +import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.util.ConfigUtil; +import com.sismics.docs.core.util.DirectoryUtil; import com.sismics.docs.core.util.jpa.PaginatedList; import com.sismics.docs.core.util.jpa.PaginatedLists; import com.sismics.docs.core.util.jpa.SortCriteria; @@ -177,12 +182,53 @@ public class AppResource extends BaseResource { } checkBaseFunction(BaseFunction.ADMIN); - JSONObject response = new JSONObject(); try { AppContext.getInstance().getIndexingService().rebuildIndex(); } catch (Exception e) { throw new ServerException("IndexingError", "Error rebuilding index", e); } + + JSONObject response = new JSONObject(); + response.put("status", "ok"); + return Response.ok().entity(response).build(); + } + + /** + * Destroy and rebuild Lucene index. + * + * @return Response + * @throws JSONException + */ + @POST + @Path("batch/clean_storage") + @Produces(MediaType.APPLICATION_JSON) + public Response batchCleanStorage() throws JSONException { + if (!authenticate()) { + throw new ForbiddenClientException(); + } + checkBaseFunction(BaseFunction.ADMIN); + + // Get all files + FileDao fileDao = new FileDao(); + List fileList = fileDao.findAll(); + Map fileMap = new HashMap<>(); + for (File file : fileList) { + fileMap.put(file.getId(), file); + } + + // Check if each stored file is valid + java.io.File[] storedFileList = DirectoryUtil.getStorageDirectory().listFiles(); + for (java.io.File storedFile : storedFileList) { + String fileName = storedFile.getName(); + if (fileName.endsWith("_thumb")) { + fileName = fileName.replace("_thumb", ""); + } + if (!fileMap.containsKey(fileName)) { + storedFile.delete(); + } + } + + JSONObject response = new JSONObject(); response.put("status", "ok"); return Response.ok().entity(response).build(); } diff --git a/docs-web/src/test/java/com/sismics/docs/rest/TestAppResource.java b/docs-web/src/test/java/com/sismics/docs/rest/TestAppResource.java index c7dad34b..92600e29 100644 --- a/docs-web/src/test/java/com/sismics/docs/rest/TestAppResource.java +++ b/docs-web/src/test/java/com/sismics/docs/rest/TestAppResource.java @@ -55,6 +55,12 @@ public class TestAppResource extends BaseJerseyTest { appResource.addFilter(new CookieAuthenticationFilter(adminAuthenticationToken)); response = appResource.post(ClientResponse.class); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); + + // Clean storage + appResource = resource().path("/app/batch/clean_storage"); + appResource.addFilter(new CookieAuthenticationFilter(adminAuthenticationToken)); + response = appResource.post(ClientResponse.class); + Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); } /** diff --git a/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java b/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java index 6a449868..7c75eff3 100644 --- a/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java +++ b/docs-web/src/test/java/com/sismics/docs/rest/TestDocumentResource.java @@ -284,6 +284,15 @@ public class TestDocumentResource extends BaseJerseyTest { json = response.getEntity(JSONObject.class); Assert.assertEquals(document1Id, json.getString("id")); + // Search documents by query + documentResource = resource().path("/document/list"); + documentResource.addFilter(new CookieAuthenticationFilter(document1Token)); + getParams = new MultivaluedMapImpl(); + getParams.putSingle("search", "super"); + response = documentResource.queryParams(getParams).get(ClientResponse.class); + json = response.getEntity(JSONObject.class); + Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); + // Get a document documentResource = resource().path("/document/" + document1Id); documentResource.addFilter(new CookieAuthenticationFilter(document1Token)); diff --git a/docs-web/src/test/java/com/sismics/docs/rest/TestFileResource.java b/docs-web/src/test/java/com/sismics/docs/rest/TestFileResource.java index fe6dc25a..556e884a 100644 --- a/docs-web/src/test/java/com/sismics/docs/rest/TestFileResource.java +++ b/docs-web/src/test/java/com/sismics/docs/rest/TestFileResource.java @@ -2,6 +2,7 @@ package com.sismics.docs.rest; import java.io.BufferedInputStream; import java.io.InputStream; +import java.nio.file.Paths; import javax.ws.rs.core.MediaType; @@ -12,6 +13,7 @@ import org.codehaus.jettison.json.JSONObject; import org.junit.Test; import com.google.common.io.ByteStreams; +import com.sismics.docs.core.util.DirectoryUtil; import com.sismics.docs.rest.filter.CookieAuthenticationFilter; import com.sun.jersey.api.client.ClientResponse; import com.sun.jersey.api.client.ClientResponse.Status; @@ -145,6 +147,12 @@ public class TestFileResource extends BaseJerseyTest { json = response.getEntity(JSONObject.class); Assert.assertEquals("ok", json.getString("status")); + // Check that files are deleted from FS + java.io.File thumbnailFile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file1Id + "_thumb").toFile(); + java.io.File storedFile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file1Id).toFile(); + Assert.assertFalse(thumbnailFile.exists()); + Assert.assertFalse(storedFile.exists()); + // Get all files from a document fileResource = resource().path("/file/list"); fileResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken));