Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_detect_os(self):
"""Test DetectOS and DetectOrientationScript (tesseract v4+)."""
self._api.SetPageSegMode(tesserocr.PSM.OSD_ONLY)
self._api.SetImageFile(self._image_file)
orientation = self._api.DetectOS()
all(self.assertIn(k, orientation) for k in ['sconfidence', 'oconfidence', 'script', 'orientation'])
self.assertEqual(orientation['orientation'], 0)
languages = tesserocr.get_languages()[1] # this is sorted alphabetically!
self.assertLess(orientation['script'], len(languages))
script_name = languages[orientation['script']] # therefore does not work
#self.assertEqual(script_name, 'Latin') # cannot test: not reliable
if _TESSERACT_VERSION >= 0x3999800:
orientation = self._api.DetectOrientationScript()
all(self.assertIn(k, orientation) for k in ['orient_deg', 'orient_conf', 'script_name', 'script_conf'])
self.assertEqual(orientation['orient_deg'], 0)
self.assertEqual(orientation['script_name'], 'Latin')
def get_languages(self, languages):
if not hasattr(self, 'supported_languages'):
from tesserocr import get_languages
_, self.supported_languages = get_languages()
codes = set(['eng'])
# for lang in list_to_alpha3(codes):
# if lang in self.supported_languages:
# codes.add(lang)
return '+'.join(sorted(codes))
def is_available(cls):
try:
from tesserocr import get_languages
path, languages = get_languages()
return len(languages) > 0
except ImportError:
return False
import os
import tesserocr
TESSDATA_PREFIX = os.environ['TESSDATA_PREFIX'] if 'TESSDATA_PREFIX' in os.environ else tesserocr.get_languages()[0]
def language_list(self, languages):
if not hasattr(settings, 'ocr_supported'):
with temp_locale(TESSERACT_LOCALE):
# Tesseract language types:
from tesserocr import get_languages
_, settings.ocr_supported = get_languages()
# log.info("OCR languages: %r", settings.ocr_supported)
models = [c for c in alpha3(languages) if c in settings.ocr_supported]
if len(models) > self.MAX_MODELS:
log.warning("Too many models, limit: %s", self.MAX_MODELS)
models = models[:self.MAX_MODELS]
models.append('eng')
return '+'.join(sorted(set(models)))
def __init__(self):
# Tesseract language types:
_, self.supported = get_languages()
self.tl = threading.local()
def get_languages(self, languages):
if not hasattr(self, 'supported_languages'):
from tesserocr import get_languages
_, self.supported_languages = get_languages()
codes = set(['eng'])
for lang in list_to_alpha3(codes):
if lang in self.supported_languages:
codes.add(lang)
return '+'.join(sorted(codes))
def get_languages(*args, **kwargs):
"""
Wraps tesserocr.get_languages() with a fixed path parameter.
"""
return get_languages_(*args, path=TESSDATA_PREFIX, **kwargs)