added OCR extension

This commit is contained in:
Mario Voigt 2021-07-09 20:01:50 +02:00
parent 21ba44021a
commit 150c077ad7
3 changed files with 191 additions and 0 deletions

129
extensions/fablabchemnitz/ocr/.gitignore vendored Normal file
View File

@ -0,0 +1,129 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<inkscape-extension xmlns="http://www.inkscape.org/namespace/inkscape/extension">
<name>OCR (Image To Text)</name>
<id>fablabchemnitz.de.ocr</id>
<effect>
<object-type>path</object-type>
<effects-menu>
<submenu name="FabLab Chemnitz">
<submenu name="Various" />
</submenu>
</effects-menu>
</effect>
<script>
<command location="inx" interpreter="python">ocr.py</command>
</script>
</inkscape-extension>

View File

@ -0,0 +1,46 @@
#!/usr/bin/env python3
# Copyright (C) 2021 Amal Santhosh , amalsanp@gmail.com
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
import inkex
import pytesseract
from PIL import Image
import cairosvg
import os
class OcrOutputExtension(inkex.OutputExtension):
def effect(self):
try:
cairosvg.svg2png(url=self.file_io.name, write_to='read.png')
im = Image.open('read.png')
text = pytesseract.image_to_string(im,lang ='eng')
self.msg(text.rstrip())
os.remove('read.png')
except Exception as e:
self.msg(e)
self.msg("Image reading failed!")
return
def save(self, stream):
pass
if __name__ == '__main__':
OcrOutputExtension().run()