Document language (server), OCR files and store result in database

This commit is contained in:
jendib 2013-08-16 23:48:35 +02:00
parent 70a86dc86f
commit 1f1f02ed41
29 changed files with 670 additions and 27 deletions

View File

@ -117,6 +117,22 @@
<artifactId>imgscalr-lib</artifactId> <artifactId>imgscalr-lib</artifactId>
</dependency> </dependency>
<!-- OCR dependencies -->
<dependency>
<groupId>jna</groupId>
<artifactId>jna</artifactId>
</dependency>
<dependency>
<groupId>jai</groupId>
<artifactId>imageio</artifactId>
</dependency>
<dependency>
<groupId>tess4j</groupId>
<artifactId>tess4j</artifactId>
</dependency>
<!-- Test dependencies --> <!-- Test dependencies -->
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>

View File

@ -1,5 +1,9 @@
package com.sismics.docs.core.constant; package com.sismics.docs.core.constant;
import java.util.List;
import com.google.common.collect.Lists;
/** /**
* Application constants. * Application constants.
* *
@ -40,4 +44,9 @@ public class Constants {
* Default generic user role. * Default generic user role.
*/ */
public static final String DEFAULT_USER_ROLE = "user"; public static final String DEFAULT_USER_ROLE = "user";
/**
* Supported document languages.
*/
public static final List<String> SUPPORTED_LANGUAGES = Lists.newArrayList("eng", "fra");
} }

View File

@ -123,7 +123,7 @@ public class DocumentDao {
Map<String, Object> parameterMap = new HashMap<String, Object>(); Map<String, Object> parameterMap = new HashMap<String, Object>();
List<String> criteriaList = new ArrayList<String>(); List<String> criteriaList = new ArrayList<String>();
StringBuilder sb = new StringBuilder("select d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, s.SHA_ID_C is not null c4 "); StringBuilder sb = new StringBuilder("select d.DOC_ID_C c0, d.DOC_TITLE_C c1, d.DOC_DESCRIPTION_C c2, d.DOC_CREATEDATE_D c3, d.DOC_LANGUAGE_C c4, s.SHA_ID_C is not null c5 ");
sb.append(" from T_DOCUMENT d "); sb.append(" from T_DOCUMENT d ");
sb.append(" left join T_SHARE s on s.SHA_IDDOCUMENT_C = d.DOC_ID_C and s.SHA_DELETEDATE_D is null "); sb.append(" left join T_SHARE s on s.SHA_IDDOCUMENT_C = d.DOC_ID_C and s.SHA_DELETEDATE_D is null ");
@ -156,6 +156,10 @@ public class DocumentDao {
if (criteria.getShared() != null && criteria.getShared()) { if (criteria.getShared() != null && criteria.getShared()) {
criteriaList.add("s.SHA_ID_C is not null"); criteriaList.add("s.SHA_ID_C is not null");
} }
if (criteria.getLanguage() != null) {
criteriaList.add("d.DOC_LANGUAGE_C = :language");
parameterMap.put("language", criteria.getLanguage());
}
criteriaList.add("d.DOC_DELETEDATE_D is null"); criteriaList.add("d.DOC_DELETEDATE_D is null");
@ -177,6 +181,7 @@ public class DocumentDao {
documentDto.setTitle((String) o[i++]); documentDto.setTitle((String) o[i++]);
documentDto.setDescription((String) o[i++]); documentDto.setDescription((String) o[i++]);
documentDto.setCreateTimestamp(((Timestamp) o[i++]).getTime()); documentDto.setCreateTimestamp(((Timestamp) o[i++]).getTime());
documentDto.setLanguage((String) o[i++]);
documentDto.setShared((Boolean) o[i++]); documentDto.setShared((Boolean) o[i++]);
documentDtoList.add(documentDto); documentDtoList.add(documentDto);
} }

View File

@ -1,14 +1,15 @@
package com.sismics.docs.core.dao.jpa; package com.sismics.docs.core.dao.jpa;
import com.sismics.docs.core.model.jpa.File; import java.util.Date;
import com.sismics.util.context.ThreadLocalContext; import java.util.List;
import java.util.UUID;
import javax.persistence.EntityManager; import javax.persistence.EntityManager;
import javax.persistence.NoResultException; import javax.persistence.NoResultException;
import javax.persistence.Query; import javax.persistence.Query;
import java.util.Date;
import java.util.List; import com.sismics.docs.core.model.jpa.File;
import java.util.UUID; import com.sismics.util.context.ThreadLocalContext;
/** /**
* File DAO. * File DAO.
@ -66,6 +67,26 @@ public class FileDao {
fileDb.setDeleteDate(dateNow); fileDb.setDeleteDate(dateNow);
} }
/**
* Updates the content of a file.
*
* @param file File to update
* @return Updated file
*/
public File updateContent(File file) {
EntityManager em = ThreadLocalContext.get().getEntityManager();
// Get the file
Query q = em.createQuery("select f from File f where f.id = :id and f.deleteDate is null");
q.setParameter("id", file.getId());
File fileFromDb = (File) q.getSingleResult();
// Update the user
fileFromDb.setContent(file.getContent());
return file;
}
/** /**
* Gets a file by its ID. * Gets a file by its ID.
* *

View File

@ -40,6 +40,11 @@ public class DocumentCriteria {
*/ */
private Boolean shared; private Boolean shared;
/**
* Language.
*/
private String language;
/** /**
* Getter of userId. * Getter of userId.
* *
@ -147,4 +152,22 @@ public class DocumentCriteria {
public void setShared(Boolean shared) { public void setShared(Boolean shared) {
this.shared = shared; this.shared = shared;
} }
/**
* Getter of language.
*
* @return the language
*/
public String getLanguage() {
return language;
}
/**
* Setter of language.
*
* @param language language
*/
public void setLanguage(String language) {
this.language = language;
}
} }

View File

@ -24,6 +24,11 @@ public class DocumentDto {
*/ */
private String description; private String description;
/**
* Language.
*/
private String language;
/** /**
* Creation date. * Creation date.
*/ */
@ -123,4 +128,22 @@ public class DocumentDto {
public void setShared(Boolean shared) { public void setShared(Boolean shared) {
this.shared = shared; this.shared = shared;
} }
/**
* Getter of language.
*
* @return the language
*/
public String getLanguage() {
return language;
}
/**
* Setter of language.
*
* @param language language
*/
public void setLanguage(String language) {
this.language = language;
}
} }

View File

@ -0,0 +1,151 @@
package com.sismics.docs.core.dao.lucene;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import com.sismics.docs.core.model.context.AppContext;
import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.util.LuceneUtil;
import com.sismics.docs.core.util.LuceneUtil.LuceneRunnable;
/**
* Lucene DAO.
*
* @author bgamard
*/
public class LuceneDao {
/**
* Destroy and rebuild index.
*
* @param fileList
*/
public void rebuildIndex(final List<File> fileList) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
// Empty index
indexWriter.deleteAll();
// Add all files
for (File file : fileList) {
org.apache.lucene.document.Document document = getDocumentFromFile(file);
indexWriter.addDocument(document);
}
}
});
}
/**
* Add files to the index.
*
* @param fileList
*/
public void create(final List<File> fileList) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
// Add all files
for (File file : fileList) {
org.apache.lucene.document.Document document = getDocumentFromFile(file);
indexWriter.addDocument(document);
}
}
});
}
/**
* Update index.
*
* @param fileList File list
*/
public void update(final List<File> fileList) {
LuceneUtil.handle(new LuceneRunnable() {
@Override
public void run(IndexWriter indexWriter) throws Exception {
// Update all files
for (File file : fileList) {
org.apache.lucene.document.Document document = getDocumentFromFile(file);
indexWriter.updateDocument(new Term("id", file.getId()), document);
}
}
});
}
/**
* Search files.
*
* @param paginatedList
* @param feedList
* @param searchQuery
* @return List of file IDs
* @throws Exception
*/
public List<String> search(String userId, String searchQuery, int limit) throws Exception {
// Escape query and add quotes so QueryParser generate a PhraseQuery
searchQuery = "\"" + QueryParserUtil.escape(searchQuery) + "\"";
// Build search query
StandardQueryParser qpHelper = new StandardQueryParser(new DocsStandardAnalyzer(Version.LUCENE_42));
qpHelper.setPhraseSlop(100000); // PhraseQuery add terms
Query contentQuery = qpHelper.parse(searchQuery, "content");
// Search on file content
BooleanQuery query = new BooleanQuery();
query.add(contentQuery, Occur.SHOULD);
// Filter on provided user ID
List<Term> terms = new ArrayList<Term>();
terms.add(new Term("user_id", userId));
TermsFilter feedsFilter = new TermsFilter(terms);
// Search
IndexReader reader = DirectoryReader.open(AppContext.getInstance().getLuceneDirectory());
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(query, feedsFilter, limit);
ScoreDoc[] docs = topDocs.scoreDocs;
// Extract file IDs
List<String> fileIdList = new ArrayList<String>();
for (int i = 0; i < docs.length; i++) {
String id = searcher.doc(docs[i].doc).get("id");
fileIdList.add(id);
}
return fileIdList;
}
/**
* Build Lucene document from file.
*
* @param file File
* @return Document
*/
private org.apache.lucene.document.Document getDocumentFromFile(File file) {
// Building document
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
document.add(new StringField("id", file.getId(), Field.Store.YES));
document.add(new TextField("content", file.getContent(), Field.Store.NO));
return document;
}
}

View File

@ -0,0 +1,66 @@
package com.sismics.docs.core.event;
import com.google.common.base.Objects;
import com.sismics.docs.core.model.jpa.Document;
import com.sismics.docs.core.model.jpa.File;
/**
* New file created event.
*
* @author bgamard
*/
public class FileCreatedAsyncEvent {
/**
* Created file.
*/
private File file;
/**
* Document linked to the file.
*/
private Document document;
/**
* Getter of file.
*
* @return the file
*/
public File getFile() {
return file;
}
/**
* Setter of file.
*
* @param file file
*/
public void setFile(File file) {
this.file = file;
}
/**
* Getter of document.
*
* @return the document
*/
public Document getDocument() {
return document;
}
/**
* Setter of document.
*
* @param document document
*/
public void setDocument(Document document) {
this.document = document;
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("file", file)
.add("document", document)
.toString();
}
}

View File

@ -0,0 +1,43 @@
package com.sismics.docs.core.listener.async;
import java.text.MessageFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.eventbus.Subscribe;
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.util.ImageUtil;
/**
* Listener on new file.
*
* @author bgamard
*/
public class FileCreatedAsyncListener {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(FileCreatedAsyncListener.class);
/**
* Process new file.
*
* @param fileCreatedAsyncEvent New file created event
* @throws Exception
*/
@Subscribe
public void onArticleCreated(final FileCreatedAsyncEvent fileCreatedAsyncEvent) throws Exception {
if (log.isInfoEnabled()) {
log.info("File created event: " + fileCreatedAsyncEvent.toString());
}
// OCR the file if it is an image
if (ImageUtil.isImage(fileCreatedAsyncEvent.getFile().getMimeType())) {
long startTime = System.currentTimeMillis();
FileUtil.ocrFile(fileCreatedAsyncEvent.getDocument(), fileCreatedAsyncEvent.getFile());
log.info(MessageFormat.format("File OCR-ized in {0}ms", System.currentTimeMillis() - startTime));
}
}
}

View File

@ -1,15 +1,5 @@
package com.sismics.docs.core.model.context; package com.sismics.docs.core.model.context;
import com.google.common.eventbus.AsyncEventBus;
import com.google.common.eventbus.EventBus;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.docs.core.dao.jpa.ConfigDao;
import com.sismics.docs.core.listener.sync.DeadEventListener;
import com.sismics.docs.core.model.jpa.Config;
import com.sismics.docs.core.service.IndexingService;
import com.sismics.util.EnvironmentUtil;
import org.apache.lucene.store.Directory;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
@ -17,6 +7,18 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.lucene.store.Directory;
import com.google.common.eventbus.AsyncEventBus;
import com.google.common.eventbus.EventBus;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.docs.core.dao.jpa.ConfigDao;
import com.sismics.docs.core.listener.async.FileCreatedAsyncListener;
import com.sismics.docs.core.listener.sync.DeadEventListener;
import com.sismics.docs.core.model.jpa.Config;
import com.sismics.docs.core.service.IndexingService;
import com.sismics.util.EnvironmentUtil;
/** /**
* Global application context. * Global application context.
* *
@ -77,6 +79,7 @@ public class AppContext {
asyncExecutorList = new ArrayList<ExecutorService>(); asyncExecutorList = new ArrayList<ExecutorService>();
asyncEventBus = newAsyncEventBus(); asyncEventBus = newAsyncEventBus();
asyncEventBus.register(new FileCreatedAsyncListener());
} }
/** /**

View File

@ -29,6 +29,12 @@ public class Document {
@Column(name = "DOC_IDUSER_C", nullable = false, length = 36) @Column(name = "DOC_IDUSER_C", nullable = false, length = 36)
private String userId; private String userId;
/**
* Language (ISO 639-9).
*/
@Column(name = "DOC_LANGUAGE_C", nullable = false, length = 3)
private String language;
/** /**
* Title. * Title.
*/ */
@ -71,6 +77,24 @@ public class Document {
this.id = id; this.id = id;
} }
/**
* Getter of language.
*
* @return the language
*/
public String getLanguage() {
return language;
}
/**
* Setter of language.
*
* @param language language
*/
public void setLanguage(String language) {
this.language = language;
}
/** /**
* Getter of userId. * Getter of userId.
* *

View File

@ -5,6 +5,7 @@ import com.google.common.base.Objects;
import javax.persistence.Column; import javax.persistence.Column;
import javax.persistence.Entity; import javax.persistence.Entity;
import javax.persistence.Id; import javax.persistence.Id;
import javax.persistence.Lob;
import javax.persistence.Table; import javax.persistence.Table;
import java.util.Date; import java.util.Date;
@ -30,11 +31,18 @@ public class File {
private String documentId; private String documentId;
/** /**
* Document ID. * MIME type.
*/ */
@Column(name = "FIL_MIMETYPE_C", length = 100) @Column(name = "FIL_MIMETYPE_C", length = 100)
private String mimeType; private String mimeType;
/**
* OCR-ized content.
*/
@Lob
@Column(name = "FIL_CONTENT_C")
private String content;
/** /**
* Creation date. * Creation date.
*/ */
@ -143,6 +151,24 @@ public class File {
this.deleteDate = deleteDate; this.deleteDate = deleteDate;
} }
/**
* Getter of content.
*
* @return the content
*/
public String getContent() {
return content;
}
/**
* Setter of content.
*
* @param content content
*/
public void setContent(String content) {
this.content = content;
}
/** /**
* Getter of order. * Getter of order.
* *

View File

@ -0,0 +1,75 @@
package com.sismics.docs.core.util;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.nio.file.Paths;
import javax.imageio.ImageIO;
import net.sourceforge.tess4j.Tesseract;
import org.imgscalr.Scalr;
import org.imgscalr.Scalr.Method;
import org.imgscalr.Scalr.Mode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.sismics.docs.core.dao.jpa.FileDao;
import com.sismics.docs.core.model.jpa.Document;
import com.sismics.docs.core.model.jpa.File;
/**
* File entity utilities.
*
* @author bgamard
*/
public class FileUtil {
/**
* Logger.
*/
private static final Logger log = LoggerFactory.getLogger(FileUtil.class);
/**
* OCR a file.
*
* @param document Document linked to the file
* @param file File to OCR
*/
public static void ocrFile(Document document, final File file) {
Tesseract instance = Tesseract.getInstance();
java.io.File storedfile = Paths.get(DirectoryUtil.getStorageDirectory().getPath(), file.getId()).toFile();
String content = null;
BufferedImage image = null;
try {
image = ImageIO.read(storedfile);
} catch (IOException e) {
log.error("Error reading the image " + storedfile, e);
}
// Upscale the image if it is too small
if (image.getWidth() < 2500 || image.getHeight() < 2500) {
BufferedImage resizedImage = Scalr.resize(image, Method.AUTOMATIC, Mode.AUTOMATIC, 3500);
image.flush();
image = resizedImage;
}
// OCR the file
try {
instance.setLanguage(document.getLanguage());
content = instance.doOCR(image);
} catch (Exception e) {
log.error("Error while OCR-izing the file " + storedfile, e);
}
file.setContent(content);
// Store the OCR-ization result in the database
TransactionUtil.handle(new Runnable() {
@Override
public void run() {
FileDao fileDao = new FileDao();
fileDao.updateContent(file);
}
});
}
}

View File

@ -1 +1 @@
db.version=4 db.version=5

View File

@ -0,0 +1,3 @@
alter table T_FILE add column FIL_CONTENT_C LONGVARCHAR;
alter table T_DOCUMENT add column DOC_LANGUAGE_C varchar(3) default 'fra' not null;
update T_CONFIG set CFG_VALUE_C='5' where CFG_ID_C='DB_VERSION';

View File

@ -0,0 +1,7 @@
- Add language on document (client)
- Index title and description (server)
- Use Lucene for title and description searching (server)
- Index OCR-ized content (server)
- Search in OCR-ized files (server)
- Batch to OCR all documents (server)
- Batch to rebuild Lucene index (server)

Binary file not shown.

BIN
docs-parent/lib/jna.jar Normal file

Binary file not shown.

BIN
docs-parent/lib/tess4j.jar Normal file

Binary file not shown.

View File

@ -159,6 +159,7 @@
<artifactId>osxappbundle-maven-plugin</artifactId> <artifactId>osxappbundle-maven-plugin</artifactId>
<version>${org.codehaus.mojo.osxappbundle-maven-plugin.version}</version> <version>${org.codehaus.mojo.osxappbundle-maven-plugin.version}</version>
</plugin> </plugin>
</plugins> </plugins>
</build> </build>
@ -434,6 +435,25 @@
<artifactId>imgscalr-lib</artifactId> <artifactId>imgscalr-lib</artifactId>
<version>${org.imgscalr.imgscalr-lib.version}</version> <version>${org.imgscalr.imgscalr-lib.version}</version>
</dependency> </dependency>
<!-- OCR dependencies -->
<dependency>
<groupId>jna</groupId>
<artifactId>jna</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>jai</groupId>
<artifactId>imageio</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>1.0</version>
</dependency>
</dependencies> </dependencies>
</dependencyManagement> </dependencyManagement>
@ -450,4 +470,74 @@
</releases> </releases>
</pluginRepository> </pluginRepository>
</pluginRepositories> </pluginRepositories>
<profiles>
<profile>
<id>init</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-install-plugin</artifactId>
<version>2.3.1</version>
<executions>
<execution>
<id>install-jna</id>
<phase>validate</phase>
<configuration>
<file>${project.basedir}/lib/jna.jar</file>
<repositoryLayout>default</repositoryLayout>
<groupId>jna</groupId>
<artifactId>jna</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<generatePom>true</generatePom>
</configuration>
<goals>
<goal>install-file</goal>
</goals>
</execution>
<execution>
<id>install-jai-imageio</id>
<phase>validate</phase>
<configuration>
<file>${project.basedir}/lib/jai_imageio.jar</file>
<repositoryLayout>default</repositoryLayout>
<groupId>jai</groupId>
<artifactId>imageio</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<generatePom>true</generatePom>
</configuration>
<goals>
<goal>install-file</goal>
</goals>
</execution>
<execution>
<id>install-tess4j</id>
<phase>validate</phase>
<configuration>
<file>${project.basedir}/lib/tess4j.jar</file>
<repositoryLayout>default</repositoryLayout>
<groupId>tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<generatePom>true</generatePom>
</configuration>
<goals>
<goal>install-file</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project> </project>

View File

@ -1,3 +1,3 @@
api.current_version=${project.version} api.current_version=${project.version}
api.min_version=1.0 api.min_version=1.0
db.version=4 db.version=5

View File

@ -31,6 +31,7 @@ import org.joda.time.format.DateTimeFormatterBuilder;
import org.joda.time.format.DateTimeParser; import org.joda.time.format.DateTimeParser;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.sismics.docs.core.constant.Constants;
import com.sismics.docs.core.dao.jpa.DocumentDao; import com.sismics.docs.core.dao.jpa.DocumentDao;
import com.sismics.docs.core.dao.jpa.ShareDao; import com.sismics.docs.core.dao.jpa.ShareDao;
import com.sismics.docs.core.dao.jpa.TagDao; import com.sismics.docs.core.dao.jpa.TagDao;
@ -155,6 +156,7 @@ public class DocumentResource extends BaseResource {
document.put("description", documentDto.getDescription()); document.put("description", documentDto.getDescription());
document.put("create_date", documentDto.getCreateTimestamp()); document.put("create_date", documentDto.getCreateTimestamp());
document.put("shared", documentDto.getShared()); document.put("shared", documentDto.getShared());
document.put("language", documentDto.getLanguage());
// Get tags // Get tags
List<TagDto> tagDtoList = tagDao.getByDocumentId(documentDto.getId()); List<TagDto> tagDtoList = tagDao.getByDocumentId(documentDto.getId());
@ -178,7 +180,7 @@ public class DocumentResource extends BaseResource {
/** /**
* Parse a query according to the specified syntax, eg.: * Parse a query according to the specified syntax, eg.:
* tag:assurance tag:other before:2012 after:2011-09 shared:yes thing * tag:assurance tag:other before:2012 after:2011-09 shared:yes lang:fra thing
* *
* @param search Search query * @param search Search query
* @return DocumentCriteria * @return DocumentCriteria
@ -233,6 +235,11 @@ public class DocumentResource extends BaseResource {
if (params[1].equals("yes")) { if (params[1].equals("yes")) {
documentCriteria.setShared(true); documentCriteria.setShared(true);
} }
} else if (params[0].equals("lang")) {
// New shared state criteria
if (Constants.SUPPORTED_LANGUAGES.contains(params[1])) {
documentCriteria.setLanguage(params[1]);
}
} else { } else {
query.append(criteria); query.append(criteria);
} }
@ -256,6 +263,7 @@ public class DocumentResource extends BaseResource {
@FormParam("title") String title, @FormParam("title") String title,
@FormParam("description") String description, @FormParam("description") String description,
@FormParam("tags") List<String> tagList, @FormParam("tags") List<String> tagList,
@FormParam("language") String language,
@FormParam("create_date") String createDateStr) throws JSONException { @FormParam("create_date") String createDateStr) throws JSONException {
if (!authenticate()) { if (!authenticate()) {
throw new ForbiddenClientException(); throw new ForbiddenClientException();
@ -263,8 +271,12 @@ public class DocumentResource extends BaseResource {
// Validate input data // Validate input data
title = ValidationUtil.validateLength(title, "title", 1, 100, false); title = ValidationUtil.validateLength(title, "title", 1, 100, false);
language = ValidationUtil.validateLength(language, "language", 3, 3, false);
description = ValidationUtil.validateLength(description, "description", 0, 4000, true); description = ValidationUtil.validateLength(description, "description", 0, 4000, true);
Date createDate = ValidationUtil.validateDate(createDateStr, "create_date", true); Date createDate = ValidationUtil.validateDate(createDateStr, "create_date", true);
if (!Constants.SUPPORTED_LANGUAGES.contains(language)) {
throw new ClientException("ValidationError", MessageFormat.format("{0} is not a supported language", language));
}
// Create the document // Create the document
DocumentDao documentDao = new DocumentDao(); DocumentDao documentDao = new DocumentDao();
@ -272,6 +284,7 @@ public class DocumentResource extends BaseResource {
document.setUserId(principal.getId()); document.setUserId(principal.getId());
document.setTitle(title); document.setTitle(title);
document.setDescription(description); document.setDescription(description);
document.setLanguage(language);
if (createDate == null) { if (createDate == null) {
document.setCreateDate(new Date()); document.setCreateDate(new Date());
} else { } else {
@ -303,6 +316,7 @@ public class DocumentResource extends BaseResource {
@FormParam("title") String title, @FormParam("title") String title,
@FormParam("description") String description, @FormParam("description") String description,
@FormParam("tags") List<String> tagList, @FormParam("tags") List<String> tagList,
@FormParam("language") String language,
@FormParam("create_date") String createDateStr) throws JSONException { @FormParam("create_date") String createDateStr) throws JSONException {
if (!authenticate()) { if (!authenticate()) {
throw new ForbiddenClientException(); throw new ForbiddenClientException();
@ -310,8 +324,12 @@ public class DocumentResource extends BaseResource {
// Validate input data // Validate input data
title = ValidationUtil.validateLength(title, "title", 1, 100, true); title = ValidationUtil.validateLength(title, "title", 1, 100, true);
language = ValidationUtil.validateLength(language, "language", 3, 3, true);
description = ValidationUtil.validateLength(description, "description", 0, 4000, true); description = ValidationUtil.validateLength(description, "description", 0, 4000, true);
Date createDate = ValidationUtil.validateDate(createDateStr, "create_date", true); Date createDate = ValidationUtil.validateDate(createDateStr, "create_date", true);
if (language != null && !Constants.SUPPORTED_LANGUAGES.contains(language)) {
throw new ClientException("ValidationError", MessageFormat.format("{0} is not a supported language", language));
}
// Get the document // Get the document
DocumentDao documentDao = new DocumentDao(); DocumentDao documentDao = new DocumentDao();
@ -332,6 +350,9 @@ public class DocumentResource extends BaseResource {
if (createDate != null) { if (createDate != null) {
document.setCreateDate(createDate); document.setCreateDate(createDate);
} }
if (language != null) {
document.setLanguage(language);
}
// Update tags // Update tags
updateTagList(id, tagList); updateTagList(id, tagList);

View File

@ -29,6 +29,8 @@ import org.codehaus.jettison.json.JSONObject;
import com.sismics.docs.core.dao.jpa.DocumentDao; import com.sismics.docs.core.dao.jpa.DocumentDao;
import com.sismics.docs.core.dao.jpa.FileDao; import com.sismics.docs.core.dao.jpa.FileDao;
import com.sismics.docs.core.dao.jpa.ShareDao; import com.sismics.docs.core.dao.jpa.ShareDao;
import com.sismics.docs.core.event.FileCreatedAsyncEvent;
import com.sismics.docs.core.model.context.AppContext;
import com.sismics.docs.core.model.jpa.Document; import com.sismics.docs.core.model.jpa.Document;
import com.sismics.docs.core.model.jpa.File; import com.sismics.docs.core.model.jpa.File;
import com.sismics.docs.core.util.DirectoryUtil; import com.sismics.docs.core.util.DirectoryUtil;
@ -111,6 +113,12 @@ public class FileResource extends BaseResource {
// Save the file // Save the file
FileUtil.save(is, file); FileUtil.save(is, file);
// Raise a new file created event
FileCreatedAsyncEvent fileCreatedAsyncEvent = new FileCreatedAsyncEvent();
fileCreatedAsyncEvent.setDocument(document);
fileCreatedAsyncEvent.setFile(file);
AppContext.getInstance().getAsyncEventBus().post(fileCreatedAsyncEvent);
// Always return ok // Always return ok
JSONObject response = new JSONObject(); JSONObject response = new JSONObject();
response.put("status", "ok"); response.put("status", "ok");

View File

@ -1,3 +1,3 @@
api.current_version=${project.version} api.current_version=${project.version}
api.min_version=1.0 api.min_version=1.0
db.version=4 db.version=5

View File

@ -50,6 +50,7 @@ public class TestDocumentResource extends BaseJerseyTest {
postParams.add("title", "My super document 1"); postParams.add("title", "My super document 1");
postParams.add("description", "My super description for document 1"); postParams.add("description", "My super description for document 1");
postParams.add("tags", tag1Id); postParams.add("tags", tag1Id);
postParams.add("language", "eng");
long create1Date = new Date().getTime(); long create1Date = new Date().getTime();
postParams.add("create_date", create1Date); postParams.add("create_date", create1Date);
response = documentResource.put(ClientResponse.class, postParams); response = documentResource.put(ClientResponse.class, postParams);
@ -80,6 +81,7 @@ public class TestDocumentResource extends BaseJerseyTest {
JSONArray tags = documents.getJSONObject(0).getJSONArray("tags"); JSONArray tags = documents.getJSONObject(0).getJSONArray("tags");
Assert.assertTrue(documents.length() == 1); Assert.assertTrue(documents.length() == 1);
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id")); Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
Assert.assertEquals("eng", documents.getJSONObject(0).getString("language"));
Assert.assertEquals(1, tags.length()); Assert.assertEquals(1, tags.length());
Assert.assertEquals(tag1Id, tags.getJSONObject(0).getString("id")); Assert.assertEquals(tag1Id, tags.getJSONObject(0).getString("id"));
Assert.assertEquals("SuperTag", tags.getJSONObject(0).getString("name")); Assert.assertEquals("SuperTag", tags.getJSONObject(0).getString("name"));
@ -135,18 +137,30 @@ public class TestDocumentResource extends BaseJerseyTest {
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id")); Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
Assert.assertEquals(true, documents.getJSONObject(0).getBoolean("shared")); Assert.assertEquals(true, documents.getJSONObject(0).getBoolean("shared"));
// Search documents with multiple criteria // Search documents by language
documentResource = resource().path("/document/list"); documentResource = resource().path("/document/list");
documentResource.addFilter(new CookieAuthenticationFilter(document1Token)); documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
getParams = new MultivaluedMapImpl(); getParams = new MultivaluedMapImpl();
getParams.putSingle("search", "after:2010 before:2040-08 tag:super shared:yes for"); getParams.putSingle("search", "lang:eng");
response = documentResource.queryParams(getParams).get(ClientResponse.class);
json = response.getEntity(JSONObject.class);
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
documents = json.getJSONArray("documents");
Assert.assertTrue(documents.length() == 1);
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
Assert.assertEquals("eng", documents.getJSONObject(0).getString("language"));
// Search documents with multiple criteria
documentResource = resource().path("/document/list");
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
getParams = new MultivaluedMapImpl();
getParams.putSingle("search", "after:2010 before:2040-08 tag:super shared:yes lang:eng for");
response = documentResource.queryParams(getParams).get(ClientResponse.class); response = documentResource.queryParams(getParams).get(ClientResponse.class);
json = response.getEntity(JSONObject.class); json = response.getEntity(JSONObject.class);
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
documents = json.getJSONArray("documents"); documents = json.getJSONArray("documents");
Assert.assertTrue(documents.length() == 1); Assert.assertTrue(documents.length() == 1);
Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id")); Assert.assertEquals(document1Id, documents.getJSONObject(0).getString("id"));
Assert.assertEquals(true, documents.getJSONObject(0).getBoolean("shared"));
// Search documents (nothing) // Search documents (nothing)
documentResource = resource().path("/document/list"); documentResource = resource().path("/document/list");
@ -181,6 +195,17 @@ public class TestDocumentResource extends BaseJerseyTest {
documents = json.getJSONArray("documents"); documents = json.getJSONArray("documents");
Assert.assertTrue(documents.length() == 0); Assert.assertTrue(documents.length() == 0);
// Search documents (nothing)
documentResource = resource().path("/document/list");
documentResource.addFilter(new CookieAuthenticationFilter(document1Token));
getParams = new MultivaluedMapImpl();
getParams.putSingle("search", "lang:fra");
response = documentResource.queryParams(getParams).get(ClientResponse.class);
json = response.getEntity(JSONObject.class);
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
documents = json.getJSONArray("documents");
Assert.assertTrue(documents.length() == 0);
// Get a document // Get a document
documentResource = resource().path("/document/" + document1Id); documentResource = resource().path("/document/" + document1Id);
documentResource.addFilter(new CookieAuthenticationFilter(document1Token)); documentResource.addFilter(new CookieAuthenticationFilter(document1Token));

View File

@ -42,6 +42,7 @@ public class TestFileResource extends BaseJerseyTest {
documentResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken)); documentResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken));
MultivaluedMapImpl postParams = new MultivaluedMapImpl(); MultivaluedMapImpl postParams = new MultivaluedMapImpl();
postParams.add("title", "File test document 1"); postParams.add("title", "File test document 1");
postParams.add("language", "eng");
ClientResponse response = documentResource.put(ClientResponse.class, postParams); ClientResponse response = documentResource.put(ClientResponse.class, postParams);
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
JSONObject json = response.getEntity(JSONObject.class); JSONObject json = response.getEntity(JSONObject.class);
@ -52,7 +53,7 @@ public class TestFileResource extends BaseJerseyTest {
WebResource fileResource = resource().path("/file"); WebResource fileResource = resource().path("/file");
fileResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken)); fileResource.addFilter(new CookieAuthenticationFilter(file1AuthenticationToken));
FormDataMultiPart form = new FormDataMultiPart(); FormDataMultiPart form = new FormDataMultiPart();
InputStream file = this.getClass().getResourceAsStream("/file/PIA00452.jpg"); InputStream file = this.getClass().getResourceAsStream("/file/Einstein-Roosevelt-letter.png");
FormDataBodyPart fdp = new FormDataBodyPart("file", FormDataBodyPart fdp = new FormDataBodyPart("file",
new BufferedInputStream(file), new BufferedInputStream(file),
MediaType.APPLICATION_OCTET_STREAM_TYPE); MediaType.APPLICATION_OCTET_STREAM_TYPE);
@ -87,7 +88,7 @@ public class TestFileResource extends BaseJerseyTest {
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
InputStream is = response.getEntityInputStream(); InputStream is = response.getEntityInputStream();
byte[] fileBytes = ByteStreams.toByteArray(is); byte[] fileBytes = ByteStreams.toByteArray(is);
Assert.assertEquals(163510, fileBytes.length); Assert.assertEquals(292641, fileBytes.length);
// Get the thumbnail data // Get the thumbnail data
fileResource = resource().path("/file/" + file1Id + "/data"); fileResource = resource().path("/file/" + file1Id + "/data");
@ -98,7 +99,7 @@ public class TestFileResource extends BaseJerseyTest {
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
is = response.getEntityInputStream(); is = response.getEntityInputStream();
fileBytes = ByteStreams.toByteArray(is); fileBytes = ByteStreams.toByteArray(is);
Assert.assertEquals(41935, fileBytes.length); Assert.assertEquals(34050, fileBytes.length);
// Get all files from a document // Get all files from a document
fileResource = resource().path("/file/list"); fileResource = resource().path("/file/list");

View File

@ -42,6 +42,7 @@ public class TestShareResource extends BaseJerseyTest {
documentResource.addFilter(new CookieAuthenticationFilter(share1AuthenticationToken)); documentResource.addFilter(new CookieAuthenticationFilter(share1AuthenticationToken));
MultivaluedMapImpl postParams = new MultivaluedMapImpl(); MultivaluedMapImpl postParams = new MultivaluedMapImpl();
postParams.add("title", "File test document 1"); postParams.add("title", "File test document 1");
postParams.add("language", "eng");
ClientResponse response = documentResource.put(ClientResponse.class, postParams); ClientResponse response = documentResource.put(ClientResponse.class, postParams);
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
JSONObject json = response.getEntity(JSONObject.class); JSONObject json = response.getEntity(JSONObject.class);

View File

@ -66,6 +66,7 @@ public class TestTagResource extends BaseJerseyTest {
postParams = new MultivaluedMapImpl(); postParams = new MultivaluedMapImpl();
postParams.add("title", "My super document 1"); postParams.add("title", "My super document 1");
postParams.add("tags", tag3Id); postParams.add("tags", tag3Id);
postParams.add("language", "eng");
response = documentResource.put(ClientResponse.class, postParams); response = documentResource.put(ClientResponse.class, postParams);
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
json = response.getEntity(JSONObject.class); json = response.getEntity(JSONObject.class);
@ -76,6 +77,7 @@ public class TestTagResource extends BaseJerseyTest {
postParams = new MultivaluedMapImpl(); postParams = new MultivaluedMapImpl();
postParams.add("title", "My super document 1"); postParams.add("title", "My super document 1");
postParams.add("tags", tag4Id); postParams.add("tags", tag4Id);
postParams.add("language", "eng");
response = documentResource.put(ClientResponse.class, postParams); response = documentResource.put(ClientResponse.class, postParams);
Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus())); Assert.assertEquals(Status.OK, Status.fromStatusCode(response.getStatus()));
json = response.getEntity(JSONObject.class); json = response.getEntity(JSONObject.class);

Binary file not shown.

After

Width:  |  Height:  |  Size: 286 KiB