Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, path):
ocr_language = 'eng'
path = path
#if call(['which', 'tesseract']): #Run the command described by args
# print("tesseract-ocr missing") #No tesseract installed
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
self.tool = tools[0]
print("OCR tool: %s" % self.tool)
try:
langs = self.tool.get_available_languages()
self.lang = langs[0]
if ocr_language in langs:
self.lang = ocr_language
print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
except Exception as e:
print("{}".format(e))
def __init__(self):
ocr_language = 'eng'
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
self.tool = tools[0]
print("OCR tool: %s" % self.tool)
try:
langs = self.tool.get_available_languages()
self.lang = langs[0]
if ocr_language in langs:
self.lang = ocr_language
print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
except Exception as e:
print("{}".format(e))
def __init__(self):
ocr_language = 'eng'
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
self.tool = tools[0]
print("OCR tool: %s" % self.tool)
try:
langs = self.tool.get_available_languages()
self.lang = langs[0]
if ocr_language in langs:
self.lang = ocr_language
print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
except Exception as e:
print("{}".format(e))
def initialize(self):
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No tools found, do you have Tesseract installed?")
sys.exit(1) # TODO fix
self.tool = tools[0]
self.langs = self.tool.get_available_languages()
def check_required_software():
logger = logging.getLogger(__name__)
tools = pyocr.get_available_tools()
if len(tools) == 0:
raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on '
'this system. Make sure it\'s on')
elif len(tools) == 1:
logger.info("I've found only one ocr tool [%s]. This is not exactly "
"an error but you should get better results if you have "
"both Tesseract and Cuneiform installed"
% tools[0].get_name())
else:
logger.info("I've found all required software. We're good to go =)")
def __init__(self, location_regex):
self.tool = pyocr.get_available_tools()[0]
for lang in self.tool.get_available_languages():
if lang == 'osd':
continue
self.lang = lang
break
self.dateTimeRE = re.compile('^([A-Z][a-z]+) ?([0-9]{1,2}) ([0-9]{1,2}:[0-9]{2} ?[AP]M) .+ ([0-9]{1,2}:[0-9]{2} ?[AP]M)')
self.ongoingTimeRE = re.compile('^Ongoing ([0-9]{1,2}:[0-9]{2} ?[AP]M) .+ ([0-9]{1,2}:[0-9]{2} ?[AP]M)')
self.cityRE = re.compile(location_regex)
self.getDirectionsRE = re.compile('Get.*ns')
def __init__(self):
file = open('bi_grams', 'r', encoding="utf-8")
list = [x.strip('\n') for x in file.readlines()]
self.bigrams = dict((bi, i) for i, bi in enumerate(list))
self.ocr = pyocr.get_available_tools()[0]
self.categories = {1: 'foods', 2: 'electronics', 3: 'clothes', 4: 'household', 5: 'others'}
self,
self.device_settings['resolution']['stores']['loaded']
),
"progress_updater": JobFactoryProgressUpdater(self.progressbar),
}
try:
translations = gettext.translation(
'iso639-3', pycountry.LOCALES_DIR
)
logger.info("Language name translations loaded")
except Exception:
logger.exception("Unable to load languages translations")
translations = None
ocr_tools = pyocr.get_available_tools()
if len(ocr_tools) == 0:
short_ocr_langs = []
else:
short_ocr_langs = ocr_tools[0].get_available_languages()
ocr_langs = []
for short in short_ocr_langs:
if short in ['equ', 'osd']:
# ignore some (equ = equation ; osd = orientation detection)
continue
llang = self.__get_short_to_long_langs(short)
if llang:
if not translations:
tlang = llang
else:
tlang = translations.gettext(llang)
def run_ocr(self, img):
"""
:param numpy.array img: cleaned image
:return str: extracted subtitle text ('' if there is no subtitle)
"""
# average character is 581 pixels
if np.count_nonzero(img) < 1000:
return ''
tool = pyocr.get_available_tools()[0]
pil_img = Image.fromarray(img)
return tool.image_to_string(
pil_img,
lang=LANG,
)
def ocr_img(self, imgname):
"""
To use a non-standard language pack named foo.traineddata, set the TESSDATA_PREFIX environment variable so the file can be found at TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the argument -l foo.
"""
tools = pyocr.get_available_tools()
tool = tools[0]
text = tool.image_to_string(
Image.open(imgname),
lang='eng',
builder=pyocr.builders.TextBuilder(),
)
print(text)
return text