Dockerization + Fix for Tesseract 3.03

This commit is contained in:
Walter 2015-03-11 00:35:42 +01:00
parent 18cedaef2c
commit 192c2030d3
8 changed files with 41 additions and 1 deletions

10
Dockerfile Normal file
View File

@ -0,0 +1,10 @@
FROM sismics/debian-java7-jetty9
MAINTAINER benjamin.gam@gmail.com
RUN apt-get -y -q install tesseract-ocr tesseract-ocr-fra
ENV TESSDATA_PREFIX /usr/share/tesseract-ocr
ENV LC_NUMERIC C
ADD docs-web/target/docs-web-*.war /opt/jetty/webapps/docs.war
ADD docs.xml /opt/jetty/webapps/docs.xml

2
build.sh Normal file
View File

@ -0,0 +1,2 @@
#!/bin/sh
docker build -t sismics/docs .

View File

@ -53,7 +53,7 @@ public class Tesseract {
private static Tesseract instance; private static Tesseract instance;
private final static Rectangle EMPTY_RECTANGLE = new Rectangle(); private final static Rectangle EMPTY_RECTANGLE = new Rectangle();
private String language = "eng"; private String language = "eng";
private String datapath = "tessdata"; private String datapath = null;
private int psm = TessAPI.TessPageSegMode.PSM_AUTO; private int psm = TessAPI.TessPageSegMode.PSM_AUTO;
private boolean hocr; private boolean hocr;
private int pageNum; private int pageNum;

View File

@ -0,0 +1,5 @@
FROM busybox
VOLUME ["/var/docs"]
ADD data /var/docs/
RUN chown -R 1001:1001 /var/docs
CMD ["/bin/sh"]

View File

@ -0,0 +1,3 @@
#!/bin/sh
docker build -t sismics/docs_data .

3
docs-docker-data/run.sh Normal file
View File

@ -0,0 +1,3 @@
#!/bin/sh
docker rm -f sismics_docs_data
docker run --name sismics_docs_data sismics/docs_data

10
docs.xml Normal file
View File

@ -0,0 +1,10 @@
<Configure class="org.eclipse.jetty.webapp.WebAppContext">
<Set name="contextPath">/</Set>
<Set name="war"><SystemProperty name="jetty.data" default="."/>/webapps/docs.war</Set>
<Call class="java.lang.System" name="setProperty">
<Arg>docs.home</Arg>
<Arg>/var/docs</Arg>
</Call>
</Configure>

7
run-service.sh Normal file
View File

@ -0,0 +1,7 @@
#!/bin/bash
docker rm -f sismics_docs
docker run \
-d --name=sismics_docs --restart=always \
--volumes-from=sismics_docs_data \
-e 'VIRTUAL_HOST_SECURE=docs.sismics.com' -e 'VIRTUAL_PORT=80' \
sismics/docs:latest