mirror of
https://github.com/sismics/docs.git
synced 2024-11-25 15:17:57 +01:00
Document language (server), OCR files and store result in database
This commit is contained in:
parent
70a86dc86f
commit
1f1f02ed41
@ -117,6 +117,22 @@
|
|||||||
<artifactId>imgscalr-lib</artifactId>
|
<artifactId>imgscalr-lib</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- OCR dependencies -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>jna</groupId>
|
||||||
|
<artifactId>jna</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>jai</groupId>
|
||||||
|
<artifactId>imageio</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>tess4j</groupId>
|
||||||
|
<artifactId>tess4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Test dependencies -->
|
<!-- Test dependencies -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
package com.sismics.docs.core.constant;
|
package com.sismics.docs.core.constant;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Application constants.
|
* Application constants.
|
||||||
*
|
*
|
||||||
@ -40,4 +44,9 @@ public class Constants {
|
|||||||
* Default generic user role.
|
* Default generic user role.
|
||||||
*/
|
*/
|
||||||
public static final String DEFAULT_USER_ROLE = "user";
|
public static final String DEFAULT_USER_ROLE = "user";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Supported document languages.
|
||||||
|
*/
|
||||||
|
public static final List<String> SUPPORTED_LANGUAGES = Lists.newArrayList("eng", "fra");
|
||||||
}
|
}
|
||||||
|
@ -123,7 +123,7 @@ public class DocumentDao {
|
|||||||
Map<String, Object> parameterMap = new HashMap<String, Object>();
|
Map<String, Object> parameterMap = new HashMap<String, Object>();
|
||||||
List<String> criteriaList = new ArrayList<String>();
|
List<String> criteriaList = new ArrayList<String>();
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder("select d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, s.SHA_ID_C is not null c4 ");
|
StringBuilder sb = new StringBuilder("select d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, d.DOC_LANGUAGE_C c4, s.SHA_ID_C is not null c5 ");
|
||||||
sb.append(" from T_DOCUMENT d ");
|
sb.append(" from T_DOCUMENT d ");
|
||||||
sb.append(" left join T_SHARE s on s.SHA_IDDOCUMENT_C = d.DOC_ID_C and s.SHA_DELETEDATE_D is null ");
|
sb.append(" left join T_SHARE s on s.SHA_IDDOCUMENT_C = d.DOC_ID_C and s.SHA_DELETEDATE_D is null ");
|
||||||
|
|
||||||
@ -156,6 +156,10 @@ public class DocumentDao {
|
|||||||
if (criteria.getShared() != null && criteria.getShared()) {
|
if (criteria.getShared() != null && criteria.getShared()) {
|
||||||
criteriaList.add("s.SHA_ID_C is not null");
|
criteriaList.add("s.SHA_ID_C is not null");
|
||||||
}
|
}
|
||||||
|
if (criteria.getLanguage() != null) {
|
||||||
|
criteriaList.add("d.DOC_LANGUAGE_C = :language");
|
||||||
|
parameterMap.put("language", criteria.getLanguage());
|
||||||
|
}
|
||||||
|
|
||||||
criteriaList.add("d.DOC_DELETEDATE_D is null");
|
criteriaList.add("d.DOC_DELETEDATE_D is null");
|
||||||
|
|
||||||
@ -177,6 +181,7 @@ public class DocumentDao {
|
|||||||
documentDto.setTitle((String) o[i++]);
|
documentDto.setTitle((String) o[i++]);
|
||||||
documentDto.setDescription((String) o[i++]);
|
documentDto.setDescription((String) o[i++]);
|
||||||
documentDto.setCreateTimestamp(((Timestamp) o[i++]).getTime());
|
documentDto.setCreateTimestamp(((Timestamp) o[i++]).getTime());
|
||||||
|
documentDto.setLanguage((String) o[i++]);
|
||||||
documentDto.setShared((Boolean) o[i++]);
|
documentDto.setShared((Boolean) o[i++]);
|
||||||
documentDtoList.add(documentDto);
|
documentDtoList.add(documentDto);
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,15 @@
|
|||||||
package com.sismics.docs.core.dao.jpa;
|
package com.sismics.docs.core.dao.jpa;
|
||||||
|
|
||||||
import com.sismics.docs.core.model.jpa.File;
|
import java.util.Date;
|
||||||
import com.sismics.util.context.ThreadLocalContext;
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
import javax.persistence.EntityManager;
|
import javax.persistence.EntityManager;
|
||||||
import javax.persistence.NoResultException;
|
import javax.persistence.NoResultException;
|
||||||
import javax.persistence.Query;
|
import javax.persistence.Query;
|
||||||
import java.util.Date;
|
|
||||||
import java.util.List;
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
import java.util.UUID;
|
import com.sismics.util.context.ThreadLocalContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* File DAO.
|
* File DAO.
|
||||||
@ -66,6 +67,26 @@ public class FileDao {
|
|||||||
fileDb.setDeleteDate(dateNow);
|
fileDb.setDeleteDate(dateNow);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the content of a file.
|
||||||
|
*
|
||||||
|
* @param file File to update
|
||||||
|
* @return Updated file
|
||||||
|
*/
|
||||||
|
public File updateContent(File file) {
|
||||||
|
EntityManager em = ThreadLocalContext.get().getEntityManager();
|
||||||
|
|
||||||
|
// Get the file
|
||||||
|
Query q = em.createQuery("select f from File f where f.id = :id and f.deleteDate is null");
|
||||||
|
q.setParameter("id", file.getId());
|
||||||
|
File fileFromDb = (File) q.getSingleResult();
|
||||||
|
|
||||||
|
// Update the user
|
||||||
|
fileFromDb.setContent(file.getContent());
|
||||||
|
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a file by its ID.
|
* Gets a file by its ID.
|
||||||
*
|
*
|
||||||
|
@ -40,6 +40,11 @@ public class DocumentCriteria {
|
|||||||
*/
|
*/
|
||||||
private Boolean shared;
|
private Boolean shared;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Language.
|
||||||
|
*/
|
||||||
|
private String language;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Getter of userId.
|
* Getter of userId.
|
||||||
*
|
*
|
||||||
@ -147,4 +152,22 @@ public class DocumentCriteria {
|
|||||||
public void setShared(Boolean shared) {
|
public void setShared(Boolean shared) {
|
||||||
this.shared = shared;
|
this.shared = shared;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Getter of language.
|
||||||
|
*
|
||||||
|
* @return the language
|
||||||
|
*/
|
||||||
|
public String getLanguage() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setter of language.
|
||||||
|
*
|
||||||
|
* @param language language
|
||||||
|
*/
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,11 @@ public class DocumentDto {
|
|||||||
*/
|
*/
|
||||||
private String description;
|
private String description;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Language.
|
||||||
|
*/
|
||||||
|
private String language;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creation date.
|
* Creation date.
|
||||||
*/
|
*/
|
||||||
@ -123,4 +128,22 @@ public class DocumentDto {
|
|||||||
public void setShared(Boolean shared) {
|
public void setShared(Boolean shared) {
|
||||||
this.shared = shared;
|
this.shared = shared;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Getter of language.
|
||||||
|
*
|
||||||
|
* @return the language
|
||||||
|
*/
|
||||||
|
public String getLanguage() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setter of language.
|
||||||
|
*
|
||||||
|
* @param language language
|
||||||
|
*/
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,151 @@
|
|||||||
|
package com.sismics.docs.core.dao.lucene;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queries.TermsFilter;
|
||||||
|
import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil;
|
||||||
|
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
import com.sismics.docs.core.model.context.AppContext;
|
||||||
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
|
import com.sismics.docs.core.util.LuceneUtil;
|
||||||
|
import com.sismics.docs.core.util.LuceneUtil.LuceneRunnable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lucene DAO.
|
||||||
|
*
|
||||||
|
* @author bgamard
|
||||||
|
*/
|
||||||
|
public class LuceneDao {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Destroy and rebuild index.
|
||||||
|
*
|
||||||
|
* @param fileList
|
||||||
|
*/
|
||||||
|
public void rebuildIndex(final List<File> fileList) {
|
||||||
|
LuceneUtil.handle(new LuceneRunnable() {
|
||||||
|
@Override
|
||||||
|
public void run(IndexWriter indexWriter) throws Exception {
|
||||||
|
// Empty index
|
||||||
|
indexWriter.deleteAll();
|
||||||
|
|
||||||
|
// Add all files
|
||||||
|
for (File file : fileList) {
|
||||||
|
org.apache.lucene.document.Document document = getDocumentFromFile(file);
|
||||||
|
indexWriter.addDocument(document);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add files to the index.
|
||||||
|
*
|
||||||
|
* @param fileList
|
||||||
|
*/
|
||||||
|
public void create(final List<File> fileList) {
|
||||||
|
LuceneUtil.handle(new LuceneRunnable() {
|
||||||
|
@Override
|
||||||
|
public void run(IndexWriter indexWriter) throws Exception {
|
||||||
|
// Add all files
|
||||||
|
for (File file : fileList) {
|
||||||
|
org.apache.lucene.document.Document document = getDocumentFromFile(file);
|
||||||
|
indexWriter.addDocument(document);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update index.
|
||||||
|
*
|
||||||
|
* @param fileList File list
|
||||||
|
*/
|
||||||
|
public void update(final List<File> fileList) {
|
||||||
|
LuceneUtil.handle(new LuceneRunnable() {
|
||||||
|
@Override
|
||||||
|
public void run(IndexWriter indexWriter) throws Exception {
|
||||||
|
// Update all files
|
||||||
|
for (File file : fileList) {
|
||||||
|
org.apache.lucene.document.Document document = getDocumentFromFile(file);
|
||||||
|
indexWriter.updateDocument(new Term("id", file.getId()), document);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search files.
|
||||||
|
*
|
||||||
|
* @param paginatedList
|
||||||
|
* @param feedList
|
||||||
|
* @param searchQuery
|
||||||
|
* @return List of file IDs
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public List<String> search(String userId, String searchQuery, int limit) throws Exception {
|
||||||
|
// Escape query and add quotes so QueryParser generate a PhraseQuery
|
||||||
|
searchQuery = "\"" + QueryParserUtil.escape(searchQuery) + "\"";
|
||||||
|
|
||||||
|
// Build search query
|
||||||
|
StandardQueryParser qpHelper = new StandardQueryParser(new DocsStandardAnalyzer(Version.LUCENE_42));
|
||||||
|
qpHelper.setPhraseSlop(100000); // PhraseQuery add terms
|
||||||
|
Query contentQuery = qpHelper.parse(searchQuery, "content");
|
||||||
|
|
||||||
|
// Search on file content
|
||||||
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
query.add(contentQuery, Occur.SHOULD);
|
||||||
|
|
||||||
|
// Filter on provided user ID
|
||||||
|
List<Term> terms = new ArrayList<Term>();
|
||||||
|
terms.add(new Term("user_id", userId));
|
||||||
|
TermsFilter feedsFilter = new TermsFilter(terms);
|
||||||
|
|
||||||
|
// Search
|
||||||
|
IndexReader reader = DirectoryReader.open(AppContext.getInstance().getLuceneDirectory());
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
TopDocs topDocs = searcher.search(query, feedsFilter, limit);
|
||||||
|
ScoreDoc[] docs = topDocs.scoreDocs;
|
||||||
|
|
||||||
|
// Extract file IDs
|
||||||
|
List<String> fileIdList = new ArrayList<String>();
|
||||||
|
for (int i = 0; i < docs.length; i++) {
|
||||||
|
String id = searcher.doc(docs[i].doc).get("id");
|
||||||
|
fileIdList.add(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return fileIdList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build Lucene document from file.
|
||||||
|
*
|
||||||
|
* @param file File
|
||||||
|
* @return Document
|
||||||
|
*/
|
||||||
|
private org.apache.lucene.document.Document getDocumentFromFile(File file) {
|
||||||
|
// Building document
|
||||||
|
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
|
||||||
|
document.add(new StringField("id", file.getId(), Field.Store.YES));
|
||||||
|
document.add(new TextField("content", file.getContent(), Field.Store.NO));
|
||||||
|
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,66 @@
|
|||||||
|
package com.sismics.docs.core.event;
|
||||||
|
|
||||||
|
import com.google.common.base.Objects;
|
||||||
|
import com.sismics.docs.core.model.jpa.Document;
|
||||||
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* New file created event.
|
||||||
|
*
|
||||||
|
* @author bgamard
|
||||||
|
*/
|
||||||
|
public class FileCreatedAsyncEvent {
|
||||||
|
/**
|
||||||
|
* Created file.
|
||||||
|
*/
|
||||||
|
private File file;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Document linked to the file.
|
||||||
|
*/
|
||||||
|
private Document document;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Getter of file.
|
||||||
|
*
|
||||||
|
* @return the file
|
||||||
|
*/
|
||||||
|
public File getFile() {
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setter of file.
|
||||||
|
*
|
||||||
|
* @param file file
|
||||||
|
*/
|
||||||
|
public void setFile(File file) {
|
||||||
|
this.file = file;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Getter of document.
|
||||||
|
*
|
||||||
|
* @return the document
|
||||||
|
*/
|
||||||
|
public Document getDocument() {
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setter of document.
|
||||||
|
*
|
||||||
|
* @param document document
|
||||||
|
*/
|
||||||
|
public void setDocument(Document document) {
|
||||||
|
this.document = document;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return Objects.toStringHelper(this)
|
||||||
|
.add("file", file)
|
||||||
|
.add("document", document)
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,43 @@
|
|||||||
|
package com.sismics.docs.core.listener.async;
|
||||||
|
|
||||||
|
import java.text.MessageFormat;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.common.eventbus.Subscribe;
|
||||||
|
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
|
||||||
|
import com.sismics.docs.core.util.FileUtil;
|
||||||
|
import com.sismics.util.ImageUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Listener on new file.
|
||||||
|
*
|
||||||
|
* @author bgamard
|
||||||
|
*/
|
||||||
|
public class FileCreatedAsyncListener {
|
||||||
|
/**
|
||||||
|
* Logger.
|
||||||
|
*/
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process new file.
|
||||||
|
*
|
||||||
|
* @param fileCreatedAsyncEvent New file created event
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Subscribe
|
||||||
|
public void onArticleCreated(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception {
|
||||||
|
if (log.isInfoEnabled()) {
|
||||||
|
log.info("File created event: " + fileCreatedAsyncEvent.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
// OCR the file if it is an image
|
||||||
|
if (ImageUtil.isImage(fileCreatedAsyncEvent.getFile().getMimeType())) {
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
FileUtil.ocrFile(fileCreatedAsyncEvent.getDocument(), fileCreatedAsyncEvent.getFile());
|
||||||
|
log.info(MessageFormat.format("File OCR-ized in {0}ms", System.currentTimeMillis() - startTime));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,15 +1,5 @@
|
|||||||
package com.sismics.docs.core.model.context;
|
package com.sismics.docs.core.model.context;
|
||||||
|
|
||||||
import com.google.common.eventbus.AsyncEventBus;
|
|
||||||
import com.google.common.eventbus.EventBus;
|
|
||||||
import com.sismics.docs.core.constant.ConfigType;
|
|
||||||
import com.sismics.docs.core.dao.jpa.ConfigDao;
|
|
||||||
import com.sismics.docs.core.listener.sync.DeadEventListener;
|
|
||||||
import com.sismics.docs.core.model.jpa.Config;
|
|
||||||
import com.sismics.docs.core.service.IndexingService;
|
|
||||||
import com.sismics.util.EnvironmentUtil;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
@ -17,6 +7,18 @@ import java.util.concurrent.LinkedBlockingQueue;
|
|||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
|
import com.google.common.eventbus.AsyncEventBus;
|
||||||
|
import com.google.common.eventbus.EventBus;
|
||||||
|
import com.sismics.docs.core.constant.ConfigType;
|
||||||
|
import com.sismics.docs.core.dao.jpa.ConfigDao;
|
||||||
|
import com.sismics.docs.core.listener.async.FileCreatedAsyncListener;
|
||||||
|
import com.sismics.docs.core.listener.sync.DeadEventListener;
|
||||||
|
import com.sismics.docs.core.model.jpa.Config;
|
||||||
|
import com.sismics.docs.core.service.IndexingService;
|
||||||
|
import com.sismics.util.EnvironmentUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Global application context.
|
* Global application context.
|
||||||
*
|
*
|
||||||
@ -77,6 +79,7 @@ public class AppContext {
|
|||||||
asyncExecutorList = new ArrayList<ExecutorService>();
|
asyncExecutorList = new ArrayList<ExecutorService>();
|
||||||
|
|
||||||
asyncEventBus = newAsyncEventBus();
|
asyncEventBus = newAsyncEventBus();
|
||||||
|
asyncEventBus.register(new FileCreatedAsyncListener());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -29,6 +29,12 @@ public class Document {
|
|||||||
@Column(name = "DOC_IDUSER_C", nullable = false, length = 36)
|
@Column(name = "DOC_IDUSER_C", nullable = false, length = 36)
|
||||||
private String userId;
|
private String userId;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Language (ISO 639-9).
|
||||||
|
*/
|
||||||
|
@Column(name = "DOC_LANGUAGE_C", nullable = false, length = 3)
|
||||||
|
private String language;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Title.
|
* Title.
|
||||||
*/
|
*/
|
||||||
@ -71,6 +77,24 @@ public class Document {
|
|||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Getter of language.
|
||||||
|
*
|
||||||
|
* @return the language
|
||||||
|
*/
|
||||||
|
public String getLanguage() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setter of language.
|
||||||
|
*
|
||||||
|
* @param language language
|
||||||
|
*/
|
||||||
|
public void setLanguage(String language) {
|
||||||
|
this.language = language;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Getter of userId.
|
* Getter of userId.
|
||||||
*
|
*
|
||||||
|
@ -5,6 +5,7 @@ import com.google.common.base.Objects;
|
|||||||
import javax.persistence.Column;
|
import javax.persistence.Column;
|
||||||
import javax.persistence.Entity;
|
import javax.persistence.Entity;
|
||||||
import javax.persistence.Id;
|
import javax.persistence.Id;
|
||||||
|
import javax.persistence.Lob;
|
||||||
import javax.persistence.Table;
|
import javax.persistence.Table;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
||||||
@ -30,11 +31,18 @@ public class File {
|
|||||||
private String documentId;
|
private String documentId;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Document ID.
|
* MIME type.
|
||||||
*/
|
*/
|
||||||
@Column(name = "FIL_MIMETYPE_C", length = 100)
|
@Column(name = "FIL_MIMETYPE_C", length = 100)
|
||||||
private String mimeType;
|
private String mimeType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OCR-ized content.
|
||||||
|
*/
|
||||||
|
@Lob
|
||||||
|
@Column(name = "FIL_CONTENT_C")
|
||||||
|
private String content;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creation date.
|
* Creation date.
|
||||||
*/
|
*/
|
||||||
@ -143,6 +151,24 @@ public class File {
|
|||||||
this.deleteDate = deleteDate;
|
this.deleteDate = deleteDate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Getter of content.
|
||||||
|
*
|
||||||
|
* @return the content
|
||||||
|
*/
|
||||||
|
public String getContent() {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setter of content.
|
||||||
|
*
|
||||||
|
* @param content content
|
||||||
|
*/
|
||||||
|
public void setContent(String content) {
|
||||||
|
this.content = content;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Getter of order.
|
* Getter of order.
|
||||||
*
|
*
|
||||||
|
@ -0,0 +1,75 @@
|
|||||||
|
package com.sismics.docs.core.util;
|
||||||
|
|
||||||
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
|
import net.sourceforge.tess4j.Tesseract;
|
||||||
|
|
||||||
|
import org.imgscalr.Scalr;
|
||||||
|
import org.imgscalr.Scalr.Method;
|
||||||
|
import org.imgscalr.Scalr.Mode;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.sismics.docs.core.dao.jpa.FileDao;
|
||||||
|
import com.sismics.docs.core.model.jpa.Document;
|
||||||
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* File entity utilities.
|
||||||
|
*
|
||||||
|
* @author bgamard
|
||||||
|
*/
|
||||||
|
public class FileUtil {
|
||||||
|
/**
|
||||||
|
* Logger.
|
||||||
|
*/
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(FileUtil.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OCR a file.
|
||||||
|
*
|
||||||
|
* @param document Document linked to the file
|
||||||
|
* @param file File to OCR
|
||||||
|
*/
|
||||||
|
public static void ocrFile(Document document, final File file) {
|
||||||
|
Tesseract instance = Tesseract.getInstance();
|
||||||
|
java.io.File storedfile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file.getId()).toFile();
|
||||||
|
String content = null;
|
||||||
|
BufferedImage image = null;
|
||||||
|
try {
|
||||||
|
image = ImageIO.read(storedfile);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("Error reading the image " + storedfile, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upscale the image if it is too small
|
||||||
|
if (image.getWidth() < 2500 || image.getHeight() < 2500) {
|
||||||
|
BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500);
|
||||||
|
image.flush();
|
||||||
|
image = resizedImage;
|
||||||
|
}
|
||||||
|
|
||||||
|
// OCR the file
|
||||||
|
try {
|
||||||
|
instance.setLanguage(document.getLanguage());
|
||||||
|
content = instance.doOCR(image);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error while OCR-izing the file " + storedfile, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
file.setContent(content);
|
||||||
|
|
||||||
|
// Store the OCR-ization result in the database
|
||||||
|
TransactionUtil.handle(new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
FileDao fileDao = new FileDao();
|
||||||
|
fileDao.updateContent(file);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
@ -1 +1 @@
|
|||||||
db.version=4
|
db.version=5
|
@ -0,0 +1,3 @@
|
|||||||
|
alter table T_FILE add column FIL_CONTENT_C LONGVARCHAR;
|
||||||
|
alter table T_DOCUMENT add column DOC_LANGUAGE_C varchar(3) default 'fra' not null;
|
||||||
|
update T_CONFIG set CFG_VALUE_C='5' where CFG_ID_C='DB_VERSION';
|
@ -0,0 +1,7 @@
|
|||||||
|
- Add language on document (client)
|
||||||
|
- Index title and description (server)
|
||||||
|
- Use Lucene for title and description searching (server)
|
||||||
|
- Index OCR-ized content (server)
|
||||||
|
- Search in OCR-ized files (server)
|
||||||
|
- Batch to OCR all documents (server)
|
||||||
|
- Batch to rebuild Lucene index (server)
|
BIN
docs-parent/lib/jai_imageio.jar
Normal file
BIN
docs-parent/lib/jai_imageio.jar
Normal file
Binary file not shown.
BIN
docs-parent/lib/jna.jar
Normal file
BIN
docs-parent/lib/jna.jar
Normal file
Binary file not shown.
BIN
docs-parent/lib/tess4j.jar
Normal file
BIN
docs-parent/lib/tess4j.jar
Normal file
Binary file not shown.
@ -159,6 +159,7 @@
|
|||||||
<artifactId>osxappbundle-maven-plugin</artifactId>
|
<artifactId>osxappbundle-maven-plugin</artifactId>
|
||||||
<version>${org.codehaus.mojo.osxappbundle-maven-plugin.version}</version>
|
<version>${org.codehaus.mojo.osxappbundle-maven-plugin.version}</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
@ -434,6 +435,25 @@
|
|||||||
<artifactId>imgscalr-lib</artifactId>
|
<artifactId>imgscalr-lib</artifactId>
|
||||||
<version>${org.imgscalr.imgscalr-lib.version}</version>
|
<version>${org.imgscalr.imgscalr-lib.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- OCR dependencies -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>jna</groupId>
|
||||||
|
<artifactId>jna</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>jai</groupId>
|
||||||
|
<artifactId>imageio</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>tess4j</groupId>
|
||||||
|
<artifactId>tess4j</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
@ -450,4 +470,74 @@
|
|||||||
</releases>
|
</releases>
|
||||||
</pluginRepository>
|
</pluginRepository>
|
||||||
</pluginRepositories>
|
</pluginRepositories>
|
||||||
|
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>init</id>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-install-plugin</artifactId>
|
||||||
|
<version>2.3.1</version>
|
||||||
|
<executions>
|
||||||
|
|
||||||
|
<execution>
|
||||||
|
<id>install-jna</id>
|
||||||
|
<phase>validate</phase>
|
||||||
|
<configuration>
|
||||||
|
<file>${project.basedir}/lib/jna.jar</file>
|
||||||
|
<repositoryLayout>default</repositoryLayout>
|
||||||
|
<groupId>jna</groupId>
|
||||||
|
<artifactId>jna</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
<generatePom>true</generatePom>
|
||||||
|
</configuration>
|
||||||
|
<goals>
|
||||||
|
<goal>install-file</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
|
||||||
|
<execution>
|
||||||
|
<id>install-jai-imageio</id>
|
||||||
|
<phase>validate</phase>
|
||||||
|
<configuration>
|
||||||
|
<file>${project.basedir}/lib/jai_imageio.jar</file>
|
||||||
|
<repositoryLayout>default</repositoryLayout>
|
||||||
|
<groupId>jai</groupId>
|
||||||
|
<artifactId>imageio</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
<generatePom>true</generatePom>
|
||||||
|
</configuration>
|
||||||
|
<goals>
|
||||||
|
<goal>install-file</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
|
||||||
|
<execution>
|
||||||
|
<id>install-tess4j</id>
|
||||||
|
<phase>validate</phase>
|
||||||
|
<configuration>
|
||||||
|
<file>${project.basedir}/lib/tess4j.jar</file>
|
||||||
|
<repositoryLayout>default</repositoryLayout>
|
||||||
|
<groupId>tess4j</groupId>
|
||||||
|
<artifactId>tess4j</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
<generatePom>true</generatePom>
|
||||||
|
</configuration>
|
||||||
|
<goals>
|
||||||
|
<goal>install-file</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
api.current_version=${project.version}
|
api.current_version=${project.version}
|
||||||
api.min_version=1.0
|
api.min_version=1.0
|
||||||
db.version=4
|
db.version=5
|
@ -31,6 +31,7 @@ import org.joda.time.format.DateTimeFormatterBuilder;
|
|||||||
import org.joda.time.format.DateTimeParser;
|
import org.joda.time.format.DateTimeParser;
|
||||||
|
|
||||||
import com.google.common.base.Strings;
|
import com.google.common.base.Strings;
|
||||||
|
import com.sismics.docs.core.constant.Constants;
|
||||||
import com.sismics.docs.core.dao.jpa.DocumentDao;
|
import com.sismics.docs.core.dao.jpa.DocumentDao;
|
||||||
import com.sismics.docs.core.dao.jpa.ShareDao;
|
import com.sismics.docs.core.dao.jpa.ShareDao;
|
||||||
import com.sismics.docs.core.dao.jpa.TagDao;
|
import com.sismics.docs.core.dao.jpa.TagDao;
|
||||||
@ -155,6 +156,7 @@ public class DocumentResource extends BaseResource {
|
|||||||
document.put("description", documentDto.getDescription());
|
document.put("description", documentDto.getDescription());
|
||||||
document.put("create_date", documentDto.getCreateTimestamp());
|
document.put("create_date", documentDto.getCreateTimestamp());
|
||||||
document.put("shared", documentDto.getShared());
|
document.put("shared", documentDto.getShared());
|
||||||
|
document.put("language", documentDto.getLanguage());
|
||||||
|
|
||||||
// Get tags
|
// Get tags
|
||||||
List<TagDto> tagDtoList = tagDao.getByDocumentId(documentDto.getId());
|
List<TagDto> tagDtoList = tagDao.getByDocumentId(documentDto.getId());
|
||||||
@ -178,7 +180,7 @@ public class DocumentResource extends BaseResource {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse a query according to the specified syntax, eg.:
|
* Parse a query according to the specified syntax, eg.:
|
||||||
* tag:assurance tag:other before:2012 after:2011-09 shared:yes thing
|
* tag:assurance tag:other before:2012 after:2011-09 shared:yes lang:fra thing
|
||||||
*
|
*
|
||||||
* @param search Search query
|
* @param search Search query
|
||||||
* @return DocumentCriteria
|
* @return DocumentCriteria
|
||||||
@ -233,6 +235,11 @@ public class DocumentResource extends BaseResource {
|
|||||||
if (params[1].equals("yes")) {
|
if (params[1].equals("yes")) {
|
||||||
documentCriteria.setShared(true);
|
documentCriteria.setShared(true);
|
||||||
}
|
}
|
||||||
|
} else if (params[0].equals("lang")) {
|
||||||
|
// New shared state criteria
|
||||||
|
if (Constants.SUPPORTED_LANGUAGES.contains(params[1])) {
|
||||||
|
documentCriteria.setLanguage(params[1]);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
query.append(criteria);
|
query.append(criteria);
|
||||||
}
|
}
|
||||||
@ -256,6 +263,7 @@ public class DocumentResource extends BaseResource {
|
|||||||
@FormParam("title") String title,
|
@FormParam("title") String title,
|
||||||
@FormParam("description") String description,
|
@FormParam("description") String description,
|
||||||
@FormParam("tags") List<String> tagList,
|
@FormParam("tags") List<String> tagList,
|
||||||
|
@FormParam("language") String language,
|
||||||
@FormParam("create_date") String createDateStr) throws JSONException {
|
@FormParam("create_date") String createDateStr) throws JSONException {
|
||||||
if (!authenticate()) {
|
if (!authenticate()) {
|
||||||
throw new ForbiddenClientException();
|
throw new ForbiddenClientException();
|
||||||
@ -263,8 +271,12 @@ public class DocumentResource extends BaseResource {
|
|||||||
|
|
||||||
// Validate input data
|
// Validate input data
|
||||||
title = ValidationUtil.validateLength(title, "title", 1, 100, false);
|
title = ValidationUtil.validateLength(title, "title", 1, 100, false);
|
||||||
|
language = ValidationUtil.validateLength(language, "language", 3, 3, false);
|
||||||
description = ValidationUtil.validateLength(description, "description", 0, 4000, true);
|
description = ValidationUtil.validateLength(description, "description", 0, 4000, true);
|
||||||
Date createDate = ValidationUtil.validateDate(createDateStr, "create_date", true);
|
Date createDate = ValidationUtil.validateDate(createDateStr, "create_date", true);
|
||||||
|
if (!Constants.SUPPORTED_LANGUAGES.contains(language)) {
|
||||||
|
throw new ClientException("ValidationError", MessageFormat.format("{0} is not a supported language", language));
|
||||||
|
}
|
||||||
|
|
||||||
// Create the document
|
// Create the document
|
||||||
DocumentDao documentDao = new DocumentDao();
|
DocumentDao documentDao = new DocumentDao();
|
||||||
@ -272,6 +284,7 @@ public class DocumentResource extends BaseResource {
|
|||||||
document.setUserId(principal.getId());
|
document.setUserId(principal.getId());
|
||||||
document.setTitle(title);
|
document.setTitle(title);
|
||||||
document.setDescription(description);
|
document.setDescription(description);
|
||||||
|
document.setLanguage(language);
|
||||||
if (createDate == null) {
|
if (createDate == null) {
|
||||||
document.setCreateDate(new Date());
|
document.setCreateDate(new Date());
|
||||||
} else {
|
} else {
|
||||||
@ -303,6 +316,7 @@ public class DocumentResource extends BaseResource {
|
|||||||
@FormParam("title") String title,
|
@FormParam("title") String title,
|
||||||
@FormParam("description") String description,
|
@FormParam("description") String description,
|
||||||
@FormParam("tags") List<String> tagList,
|
@FormParam("tags") List<String> tagList,
|
||||||
|
@FormParam("language") String language,
|
||||||
@FormParam("create_date") String createDateStr) throws JSONException {
|
@FormParam("create_date") String createDateStr) throws JSONException {
|
||||||
if (!authenticate()) {
|
if (!authenticate()) {
|
||||||
throw new ForbiddenClientException();
|
throw new ForbiddenClientException();
|
||||||
@ -310,8 +324,12 @@ public class DocumentResource extends BaseResource {
|
|||||||
|
|
||||||
// Validate input data
|
// Validate input data
|
||||||
title = ValidationUtil.validateLength(title, "title", 1, 100, true);
|
title = ValidationUtil.validateLength(title, "title", 1, 100, true);
|
||||||
|
language = ValidationUtil.validateLength(language, "language", 3, 3, true);
|
||||||
description = ValidationUtil.validateLength(description, "description", 0, 4000, true);
|
description = ValidationUtil.validateLength(description, "description", 0, 4000, true);
|
||||||
Date createDate = ValidationUtil.validateDate(createDateStr, "create_date", true);
|
Date createDate = ValidationUtil.validateDate(createDateStr, "create_date", true);
|
||||||
|
if (language != null && !Constants.SUPPORTED_LANGUAGES.contains(language)) {
|
||||||
|
throw new ClientException("ValidationError", MessageFormat.format("{0} is not a supported language", language));
|
||||||
|
}
|
||||||
|
|
||||||
// Get the document
|
// Get the document
|
||||||
DocumentDao documentDao = new DocumentDao();
|
DocumentDao documentDao = new DocumentDao();
|
||||||
@ -332,6 +350,9 @@ public class DocumentResource extends BaseResource {
|
|||||||
if (createDate != null) {
|
if (createDate != null) {
|
||||||
document.setCreateDate(createDate);
|
document.setCreateDate(createDate);
|
||||||
}
|
}
|
||||||
|
if (language != null) {
|
||||||
|
document.setLanguage(language);
|
||||||
|
}
|
||||||
|
|
||||||
// Update tags
|
// Update tags
|
||||||
updateTagList(id, tagList);
|
updateTagList(id, tagList);
|
||||||
|
@ -29,6 +29,8 @@ import org.codehaus.jettison.json.JSONObject;
|
|||||||
import com.sismics.docs.core.dao.jpa.DocumentDao;
|
import com.sismics.docs.core.dao.jpa.DocumentDao;
|
||||||
import com.sismics.docs.core.dao.jpa.FileDao;
|
import com.sismics.docs.core.dao.jpa.FileDao;
|
||||||
import com.sismics.docs.core.dao.jpa.ShareDao;
|
import com.sismics.docs.core.dao.jpa.ShareDao;
|
||||||
|
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
|
||||||
|
import com.sismics.docs.core.model.context.AppContext;
|
||||||
import com.sismics.docs.core.model.jpa.Document;
|
import com.sismics.docs.core.model.jpa.Document;
|
||||||
import com.sismics.docs.core.model.jpa.File;
|
import com.sismics.docs.core.model.jpa.File;
|
||||||
import com.sismics.docs.core.util.DirectoryUtil;
|
import com.sismics.docs.core.util.DirectoryUtil;
|
||||||
@ -111,6 +113,12 @@ public class FileResource extends BaseResource {
|
|||||||
// Save the file
|
// Save the file
|
||||||
FileUtil.save(is, file);
|
FileUtil.save(is, file);
|
||||||
|
|
||||||
|
// Raise a new file created event
|
||||||
|
FileCreatedAsyncEvent fileCreatedAsyncEvent = new FileCreatedAsyncEvent();
|
||||||
|
fileCreatedAsyncEvent.setDocument(document);
|
||||||
|
fileCreatedAsyncEvent.setFile(file);
|
||||||
|
AppContext.getInstance().getAsyncEventBus().post(fileCreatedAsyncEvent);
|
||||||
|
|
||||||
// Always return ok
|
// Always return ok
|
||||||
JSONObject response = new JSONObject();
|
JSONObject response = new JSONObject();
|
||||||
response.put("status", "ok");
|
response.put("status", "ok");
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
api.current_version=${project.version}
|
api.current_version=${project.version}
|
||||||
api.min_version=1.0
|
api.min_version=1.0
|
||||||
db.version=4
|
db.version=5
|
@ -50,6 +50,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
postParams.add("title", "My super document 1");
|
postParams.add("title", "My super document 1");
|
||||||
postParams.add("description", "My super description for document 1");
|
postParams.add("description", "My super description for document 1");
|
||||||
postParams.add("tags", tag1Id);
|
postParams.add("tags", tag1Id);
|
||||||
|
postParams.add("language", "eng");
|
||||||
long create1Date = new Date().getTime();
|
long create1Date = new Date().getTime();
|
||||||
postParams.add("create_date", create1Date);
|
postParams.add("create_date", create1Date);
|
||||||
response = documentResource.put(ClientResponse.class, postParams);
|
response = documentResource.put(ClientResponse.class, postParams);
|
||||||
@ -80,6 +81,7 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
JSONArray tags = documents.getJSONObject(0).getJSONArray("tags");
|
JSONArray tags = documents.getJSONObject(0).getJSONArray("tags");
|
||||||
Assert.assertTrue(documents.length() == 1);
|
Assert.assertTrue(documents.length() == 1);
|
||||||
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
|
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
|
||||||
|
Assert.assertEquals("eng", documents.getJSONObject(0).getString("language"));
|
||||||
Assert.assertEquals(1, tags.length());
|
Assert.assertEquals(1, tags.length());
|
||||||
Assert.assertEquals(tag1Id, tags.getJSONObject(0).getString("id"));
|
Assert.assertEquals(tag1Id, tags.getJSONObject(0).getString("id"));
|
||||||
Assert.assertEquals("SuperTag", tags.getJSONObject(0).getString("name"));
|
Assert.assertEquals("SuperTag", tags.getJSONObject(0).getString("name"));
|
||||||
@ -135,18 +137,30 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
|
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
|
||||||
Assert.assertEquals(true, documents.getJSONObject(0).getBoolean("shared"));
|
Assert.assertEquals(true, documents.getJSONObject(0).getBoolean("shared"));
|
||||||
|
|
||||||
// Search documents with multiple criteria
|
// Search documents by language
|
||||||
documentResource = resource().path("/document/list");
|
documentResource = resource().path("/document/list");
|
||||||
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
||||||
getParams = new MultivaluedMapImpl();
|
getParams = new MultivaluedMapImpl();
|
||||||
getParams.putSingle("search", "after:2010 before:2040-08 tag:super shared:yes for");
|
getParams.putSingle("search", "lang:eng");
|
||||||
|
response = documentResource.queryParams(getParams).get(ClientResponse.class);
|
||||||
|
json = response.getEntity(JSONObject.class);
|
||||||
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
|
documents = json.getJSONArray("documents");
|
||||||
|
Assert.assertTrue(documents.length() == 1);
|
||||||
|
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
|
||||||
|
Assert.assertEquals("eng", documents.getJSONObject(0).getString("language"));
|
||||||
|
|
||||||
|
// Search documents with multiple criteria
|
||||||
|
documentResource = resource().path("/document/list");
|
||||||
|
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
||||||
|
getParams = new MultivaluedMapImpl();
|
||||||
|
getParams.putSingle("search", "after:2010 before:2040-08 tag:super shared:yes lang:eng for");
|
||||||
response = documentResource.queryParams(getParams).get(ClientResponse.class);
|
response = documentResource.queryParams(getParams).get(ClientResponse.class);
|
||||||
json = response.getEntity(JSONObject.class);
|
json = response.getEntity(JSONObject.class);
|
||||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
documents = json.getJSONArray("documents");
|
documents = json.getJSONArray("documents");
|
||||||
Assert.assertTrue(documents.length() == 1);
|
Assert.assertTrue(documents.length() == 1);
|
||||||
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
|
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
|
||||||
Assert.assertEquals(true, documents.getJSONObject(0).getBoolean("shared"));
|
|
||||||
|
|
||||||
// Search documents (nothing)
|
// Search documents (nothing)
|
||||||
documentResource = resource().path("/document/list");
|
documentResource = resource().path("/document/list");
|
||||||
@ -181,6 +195,17 @@ public class TestDocumentResource extends BaseJerseyTest {
|
|||||||
documents = json.getJSONArray("documents");
|
documents = json.getJSONArray("documents");
|
||||||
Assert.assertTrue(documents.length() == 0);
|
Assert.assertTrue(documents.length() == 0);
|
||||||
|
|
||||||
|
// Search documents (nothing)
|
||||||
|
documentResource = resource().path("/document/list");
|
||||||
|
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
||||||
|
getParams = new MultivaluedMapImpl();
|
||||||
|
getParams.putSingle("search", "lang:fra");
|
||||||
|
response = documentResource.queryParams(getParams).get(ClientResponse.class);
|
||||||
|
json = response.getEntity(JSONObject.class);
|
||||||
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
|
documents = json.getJSONArray("documents");
|
||||||
|
Assert.assertTrue(documents.length() == 0);
|
||||||
|
|
||||||
// Get a document
|
// Get a document
|
||||||
documentResource = resource().path("/document/" + document1Id);
|
documentResource = resource().path("/document/" + document1Id);
|
||||||
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
|
||||||
|
@ -42,6 +42,7 @@ public class TestFileResource extends BaseJerseyTest {
|
|||||||
documentResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken));
|
documentResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken));
|
||||||
MultivaluedMapImpl postParams = new MultivaluedMapImpl();
|
MultivaluedMapImpl postParams = new MultivaluedMapImpl();
|
||||||
postParams.add("title", "File test document 1");
|
postParams.add("title", "File test document 1");
|
||||||
|
postParams.add("language", "eng");
|
||||||
ClientResponse response = documentResource.put(ClientResponse.class, postParams);
|
ClientResponse response = documentResource.put(ClientResponse.class, postParams);
|
||||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
JSONObject json = response.getEntity(JSONObject.class);
|
JSONObject json = response.getEntity(JSONObject.class);
|
||||||
@ -52,7 +53,7 @@ public class TestFileResource extends BaseJerseyTest {
|
|||||||
WebResource fileResource = resource().path("/file");
|
WebResource fileResource = resource().path("/file");
|
||||||
fileResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken));
|
fileResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken));
|
||||||
FormDataMultiPart form = new FormDataMultiPart();
|
FormDataMultiPart form = new FormDataMultiPart();
|
||||||
InputStream file = this.getClass().getResourceAsStream("/file/PIA00452.jpg");
|
InputStream file = this.getClass().getResourceAsStream("/file/Einstein-Roosevelt-letter.png");
|
||||||
FormDataBodyPart fdp = new FormDataBodyPart("file",
|
FormDataBodyPart fdp = new FormDataBodyPart("file",
|
||||||
new BufferedInputStream(file),
|
new BufferedInputStream(file),
|
||||||
MediaType.APPLICATION_OCTET_STREAM_TYPE);
|
MediaType.APPLICATION_OCTET_STREAM_TYPE);
|
||||||
@ -87,7 +88,7 @@ public class TestFileResource extends BaseJerseyTest {
|
|||||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
InputStream is = response.getEntityInputStream();
|
InputStream is = response.getEntityInputStream();
|
||||||
byte[] fileBytes = ByteStreams.toByteArray(is);
|
byte[] fileBytes = ByteStreams.toByteArray(is);
|
||||||
Assert.assertEquals(163510, fileBytes.length);
|
Assert.assertEquals(292641, fileBytes.length);
|
||||||
|
|
||||||
// Get the thumbnail data
|
// Get the thumbnail data
|
||||||
fileResource = resource().path("/file/" + file1Id + "/data");
|
fileResource = resource().path("/file/" + file1Id + "/data");
|
||||||
@ -98,7 +99,7 @@ public class TestFileResource extends BaseJerseyTest {
|
|||||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
is = response.getEntityInputStream();
|
is = response.getEntityInputStream();
|
||||||
fileBytes = ByteStreams.toByteArray(is);
|
fileBytes = ByteStreams.toByteArray(is);
|
||||||
Assert.assertEquals(41935, fileBytes.length);
|
Assert.assertEquals(34050, fileBytes.length);
|
||||||
|
|
||||||
// Get all files from a document
|
// Get all files from a document
|
||||||
fileResource = resource().path("/file/list");
|
fileResource = resource().path("/file/list");
|
||||||
|
@ -42,6 +42,7 @@ public class TestShareResource extends BaseJerseyTest {
|
|||||||
documentResource.addFilter(new CookieAuthenticationFilter(share1AuthenticationToken));
|
documentResource.addFilter(new CookieAuthenticationFilter(share1AuthenticationToken));
|
||||||
MultivaluedMapImpl postParams = new MultivaluedMapImpl();
|
MultivaluedMapImpl postParams = new MultivaluedMapImpl();
|
||||||
postParams.add("title", "File test document 1");
|
postParams.add("title", "File test document 1");
|
||||||
|
postParams.add("language", "eng");
|
||||||
ClientResponse response = documentResource.put(ClientResponse.class, postParams);
|
ClientResponse response = documentResource.put(ClientResponse.class, postParams);
|
||||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
JSONObject json = response.getEntity(JSONObject.class);
|
JSONObject json = response.getEntity(JSONObject.class);
|
||||||
|
@ -66,6 +66,7 @@ public class TestTagResource extends BaseJerseyTest {
|
|||||||
postParams = new MultivaluedMapImpl();
|
postParams = new MultivaluedMapImpl();
|
||||||
postParams.add("title", "My super document 1");
|
postParams.add("title", "My super document 1");
|
||||||
postParams.add("tags", tag3Id);
|
postParams.add("tags", tag3Id);
|
||||||
|
postParams.add("language", "eng");
|
||||||
response = documentResource.put(ClientResponse.class, postParams);
|
response = documentResource.put(ClientResponse.class, postParams);
|
||||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
json = response.getEntity(JSONObject.class);
|
json = response.getEntity(JSONObject.class);
|
||||||
@ -76,6 +77,7 @@ public class TestTagResource extends BaseJerseyTest {
|
|||||||
postParams = new MultivaluedMapImpl();
|
postParams = new MultivaluedMapImpl();
|
||||||
postParams.add("title", "My super document 1");
|
postParams.add("title", "My super document 1");
|
||||||
postParams.add("tags", tag4Id);
|
postParams.add("tags", tag4Id);
|
||||||
|
postParams.add("language", "eng");
|
||||||
response = documentResource.put(ClientResponse.class, postParams);
|
response = documentResource.put(ClientResponse.class, postParams);
|
||||||
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
|
||||||
json = response.getEntity(JSONObject.class);
|
json = response.getEntity(JSONObject.class);
|
||||||
|
BIN
docs-web/src/test/resources/file/Einstein-Roosevelt-letter.png
Normal file
BIN
docs-web/src/test/resources/file/Einstein-Roosevelt-letter.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 286 KiB |
Loading…
Reference in New Issue
Block a user