How to use the pyocr.pyocr function in pyocr

To help you get started, we’ve selected a few pyocr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openpaperwork / pyocr / run_tests.py View on Github external
from tests import tests_libtesseract

if __name__ == '__main__':
    for tool in pyocr.TOOLS:
        print("- OCR: %s" % tool.get_name())
        available = tool.is_available()
        print("  is_available(): %s" % (str(available)))
        if available:
            print("  get_version(): %s" % (str(tool.get_version())))
            print("  get_available_languages(): ")
            print("    " + ", ".join(tool.get_available_languages()))
        print("")
    print("")

    print("OCR tool found:")
    for tool in pyocr.get_available_tools():
        print("- %s" % tool.get_name())
    if libtesseract.is_available():
        print("---")
        print("Tesseract C-API:")
        unittest.TextTestRunner().run(tests_libtesseract.get_all_tests())
    if tesseract.is_available():
        print("---")
        print("Tesseract SH:")
        unittest.TextTestRunner().run(tests_tesseract.get_all_tests())
    if cuneiform.is_available():
        print("---")
        print("Cuneiform SH:")
        unittest.TextTestRunner().run(tests_cuneiform.get_all_tests())
github openpaperwork / pyocr / run_tests.py View on Github external
import sys
sys.path = ["src"] + sys.path
import unittest

from pyocr import cuneiform
from pyocr import pyocr
from pyocr import libtesseract
from pyocr import tesseract

from tests import tests_cuneiform
from tests import tests_tesseract
from tests import tests_libtesseract

if __name__ == '__main__':
    for tool in pyocr.TOOLS:
        print("- OCR: %s" % tool.get_name())
        available = tool.is_available()
        print("  is_available(): %s" % (str(available)))
        if available:
            print("  get_version(): %s" % (str(tool.get_version())))
            print("  get_available_languages(): ")
            print("    " + ", ".join(tool.get_available_languages()))
        print("")
    print("")

    print("OCR tool found:")
    for tool in pyocr.get_available_tools():
        print("- %s" % tool.get_name())
    if libtesseract.is_available():
        print("---")
        print("Tesseract C-API:")
github openpaperwork / paperwork / src / paperwork / model / scanner.py View on Github external
def __set_selected_device(self, selected):
        """
        Set the device id selected by the user
        """
        if not HAS_SANE:
            self.state = (False, _('Sane module not found'))
        elif len(pyocr.pyocr.get_available_tools()) <= 0:
            self.state = (False,
                          _('No OCR tool found not available. Can\'t do OCR'))
        elif not selected:
            self.state = (False, _('No scanner has been selected'))
        else:
            self.state = (True, _('Scan'))
        self.__selected_device = selected
github openpaperwork / paperwork / paperwork-gtk / src / paperwork / deps.py View on Github external
def find_missing_ocr(lang):
    """
    OCR tools are a little bit more tricky
    """
    missing = []
    try:
        from pyocr import pyocr
        ocr_tools = pyocr.get_available_tools()
    except ImportError:
        print(
            "[WARNING] Couldn't import Pyocr. Will assume OCR tool is not"
            " installed yet"
        )
        ocr_tools = []

    if len(ocr_tools) <= 0:
        langs = []
        missing.append(
            (
                'Tesseract', '(none)',
                {
                    'debian': 'tesseract-ocr',
                    'fedora': 'tesseract',
                    'gentoo': 'app-text/tesseract',
github esauvisky / PGoTrader / trade.py View on Github external
def __init__(self, args):
        with open(args.config, "r") as f:
            self.config = yaml.load(f)
        self.args = args
        tools = pyocr.get_available_tools()
        self.tool = tools[0]
        self.p = PokemonGo()
        self.i = 2

        self.CHECK_STRING = self.config['names']['name_check']
        self.SEARCH_STRING = self.config['names']['search_string']
github esauvisky / PGoEggHatcher / egghatcher.py View on Github external
def __init__(self, args):
        with open(args.config, "r") as f:
            self.config = yaml.load(f)
        self.args = args
        tools = pyocr.get_available_tools()
        self.tool = tools[0]
        self.state = ''
        self.egg_walked = 0
        self.egg_total = 0
github the-paperless-project / paperless / src / paperless_tesseract / parsers.py View on Github external
"best with what we have."
                )
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
            error_msg = ("Language detection failed. Set "
                         "PAPERLESS_FORGIVING_OCR in config file to continue "
                         "anyway.")
            raise OCRError(error_msg)

        if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
            raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
            return raw_text

        try:
            return self._ocr(imgs, ISO639[guessed_language])
        except pyocr.pyocr.tesseract.TesseractError:
            if settings.FORGIVING_OCR:
                self.log(
                    "warning",
                    "OCR for {} failed, but we're going to stick with what "
                    "we've got since FORGIVING_OCR is enabled.".format(
                        guessed_language
                    )
                )
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
            raise OCRError(
                "The guessed language ({}) is not available in this instance "
                "of Tesseract.".format(guessed_language)
            )
github the-paperless-project / paperless / src / documents / consumers / base.py View on Github external
self._render("Language detection failed!", 0)
            if settings.FORGIVING_OCR:
                self._render(
                    "As FORGIVING_OCR is enabled, we're going to make the best "
                    "with what we have.",
                    1
                )
                return raw_text
            raise OCRError

        if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
            return raw_text

        try:
            return self._ocr(pngs, ISO639[guessed_language])
        except pyocr.pyocr.tesseract.TesseractError:
            if settings.FORGIVING_OCR:
                self._render(
                    "OCR for {} failed, but we're going to stick with what "
                    "we've got since FORGIVING_OCR is enabled.".format(
                        guessed_language
                    ),
                    0
                )
                return raw_text
            raise OCRError