How to use the pyocr.get_available_tools function in pyocr

To help you get started, we’ve selected a few pyocr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github aryaminus / saram / saram / saram.py View on Github external
def __init__(self, path):
        
        ocr_language = 'eng'
        
        path = path

        #if call(['which', 'tesseract']): #Run the command described by args
        #    print("tesseract-ocr missing") #No tesseract installed
        
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]
        print("OCR tool: %s" % self.tool)

        try:
            langs = self.tool.get_available_languages()
            self.lang = langs[0]
            if ocr_language in langs:
                self.lang = ocr_language
            print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
        except Exception as e:
            print("{}".format(e))
github aryaminus / memento / mementor / image_ocr.py View on Github external
def __init__(self):
        ocr_language = 'eng'
        
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]
        print("OCR tool: %s" % self.tool)

        try:
            langs = self.tool.get_available_languages()
            self.lang = langs[0]
            if ocr_language in langs:
                self.lang = ocr_language
            print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
        except Exception as e:
            print("{}".format(e))
github aryaminus / memento / mementor / ocr_save.py View on Github external
def __init__(self):
        
        ocr_language = 'eng'
        
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]
        print("OCR tool: %s" % self.tool)

        try:
            langs = self.tool.get_available_languages()
            self.lang = langs[0]
            if ocr_language in langs:
                self.lang = ocr_language
            print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
        except Exception as e:
            print("{}".format(e))
github ecthros / labelReader / utils / tesseract_ocr.py View on Github external
def initialize(self):
		tools = pyocr.get_available_tools()
		if len(tools) == 0:
			print("No tools found, do you have Tesseract installed?")
			sys.exit(1) # TODO fix
		self.tool = tools[0]
		self.langs = self.tool.get_available_languages()
github nfscan / ocr-process-service / service / ocr.py View on Github external
def check_required_software():
        logger = logging.getLogger(__name__)
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on '
                                             'this system. Make sure it\'s on')
        elif len(tools) == 1:
            logger.info("I've found only one ocr tool [%s]. This is not exactly "
                        "an error but you should get better results if you have "
                        "both Tesseract and Cuneiform installed"
                        % tools[0].get_name())
        else:
            logger.info("I've found all required software. We're good to go =)")
github elliotkendall / exraidbot / pokeocr.py View on Github external
def __init__(self, location_regex):
    self.tool = pyocr.get_available_tools()[0]
    for lang in self.tool.get_available_languages():
      if lang == 'osd':
        continue
      self.lang = lang
      break
    self.dateTimeRE = re.compile('^([A-Z][a-z]+) ?([0-9]{1,2}) ([0-9]{1,2}:[0-9]{2} ?[AP]M) .+ ([0-9]{1,2}:[0-9]{2} ?[AP]M)')
    self.ongoingTimeRE = re.compile('^Ongoing ([0-9]{1,2}:[0-9]{2} ?[AP]M) .+ ([0-9]{1,2}:[0-9]{2} ?[AP]M)')
    self.cityRE = re.compile(location_regex)
    self.getDirectionsRE = re.compile('Get.*ns')
github VTurturika / receipt-recognition / recognition / ocr.py View on Github external
def __init__(self):
        file = open('bi_grams', 'r', encoding="utf-8")
        list = [x.strip('\n') for x in file.readlines()]
        self.bigrams = dict((bi, i) for i, bi in enumerate(list))
        self.ocr = pyocr.get_available_tools()[0]
        self.categories = {1: 'foods', 2: 'electronics', 3: 'clothes', 4: 'household', 5: 'others'}
github openpaperwork / paperwork / paperwork-gtk / src / paperwork / frontend / settingswindow / __init__.py View on Github external
self,
                self.device_settings['resolution']['stores']['loaded']
            ),
            "progress_updater": JobFactoryProgressUpdater(self.progressbar),
        }

        try:
            translations = gettext.translation(
                'iso639-3', pycountry.LOCALES_DIR
            )
            logger.info("Language name translations loaded")
        except Exception:
            logger.exception("Unable to load languages translations")
            translations = None

        ocr_tools = pyocr.get_available_tools()

        if len(ocr_tools) == 0:
            short_ocr_langs = []
        else:
            short_ocr_langs = ocr_tools[0].get_available_languages()
        ocr_langs = []
        for short in short_ocr_langs:
            if short in ['equ', 'osd']:
                # ignore some (equ = equation ; osd = orientation detection)
                continue
            llang = self.__get_short_to_long_langs(short)
            if llang:
                if not translations:
                    tlang = llang
                else:
                    tlang = translations.gettext(llang)
github kerrickstaley / extracting-chinese-subs / main.py View on Github external
def run_ocr(self, img):
    """
    :param numpy.array img: cleaned image
    :return str: extracted subtitle text ('' if there is no subtitle)
    """
    # average character is 581 pixels
    if np.count_nonzero(img) < 1000:
      return ''

    tool = pyocr.get_available_tools()[0]
    pil_img = Image.fromarray(img)
    return tool.image_to_string(
        pil_img,
        lang=LANG,
      )
github CarlFK / veyepar / dj / scripts / addimg.py View on Github external
def ocr_img(self, imgname):

        """
        To use a non-standard language pack named foo.traineddata, set the TESSDATA_PREFIX environment variable so the file can be found at TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the argument -l foo.
        """

        tools = pyocr.get_available_tools()
        tool = tools[0]
        text = tool.image_to_string(
            Image.open(imgname),
            lang='eng',
            builder=pyocr.builders.TextBuilder(),
        )

        print(text)

        return text