Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_layout_getcomponents(self):
self._api.Init()
self._api.SetImageFile(self._image_file)
result = self._api.GetComponentImages(tesserocr.RIL.BLOCK, True)
# Test if not empty
self.assertTrue(result)
_, xywh, _, _ = result[0] # bbox of largest
self.assertIn('w', xywh)
self.assertIn('h', xywh)
area = xywh['w'] * xywh['h']
# Test if the largest block is quite large
self.assertGreater(area, 400000)
def test_layout_boundingbox(self):
self._api.Init()
self._api.SetImageFile(self._image_file)
layout = self._api.AnalyseLayout()
# Test if not empty
self.assertTrue(layout)
self.assertFalse(layout.Empty(tesserocr.RIL.BLOCK))
result = layout.BoundingBox(tesserocr.RIL.BLOCK) # bbox of largest
self.assertIsNot(result, None)
x0, y0, x1, y1 = result
area = (x1 - x0) * (y1 - y0)
# Test if the largest block is quite large
self.assertGreater(area, 400000)
def test_init(self):
"""Test Init calls with different lang and oem."""
self._api.Init(lang='eng+osd')
self.assertEqual(self._api.GetInitLanguagesAsString(), 'eng+osd')
self._api.Init(lang='eng')
self.assertEqual(self._api.GetInitLanguagesAsString(), 'eng')
self._api.Init(oem=tesserocr.OEM.TESSERACT_ONLY)
self.assertEqual(self._api.oem(), tesserocr.OEM.TESSERACT_ONLY)
def test_init(self):
"""Test Init calls with different lang and oem."""
self._api.Init(lang='eng+osd')
self.assertEqual(self._api.GetInitLanguagesAsString(), 'eng+osd')
self._api.Init(lang='eng')
self.assertEqual(self._api.GetInitLanguagesAsString(), 'eng')
self._api.Init(oem=tesserocr.OEM.TESSERACT_ONLY)
self.assertEqual(self._api.oem(), tesserocr.OEM.TESSERACT_ONLY)
def check_test():
api.SetImageFile(image)
api.SetVariable("save_blob_choices", "T")
api.SetRectangle(37, 228, 548, 31)
api.Recognize()
ri = api.GetIterator()
level = RIL.SYMBOL
for r in iterate_level(ri, level):
symbol = r.GetUTF8Text(level) # r == ri
conf = r.Confidence(level)
if symbol:
print("symbol ", symbol, " confidence", conf)
indent = False
ci = r.GetChoiceIterator()
for c in ci:
if indent:
print('\t\t '),
print('\t- '),
choice = c.GetUTF8Text() # c == ci
print(u'{} conf: {}'.format(choice, c.Confidence()))
indent = True
print('---------------------------------------------')
def check_test():
api.SetImageFile(image)
api.SetVariable("save_blob_choices", "T")
api.SetRectangle(37, 228, 548, 31)
api.Recognize()
ri = api.GetIterator()
level = RIL.SYMBOL
for r in iterate_level(ri, level):
symbol = r.GetUTF8Text(level) # r == ri
conf = r.Confidence(level)
if symbol:
print("symbol ", symbol, " confidence", conf)
indent = False
ci = r.GetChoiceIterator()
for c in ci:
if indent:
print('\t\t '),
print('\t- '),
choice = c.GetUTF8Text() # c == ci
print(u'{} conf: {}'.format(choice, c.Confidence()))
indent = True
print('---------------------------------------------')
def orientation_stuff():
api2 = PyTessBaseAPI(psm=PSM.OSD_ONLY, path=MY_TESSDATA_PATH)
api2.SetImageFile('/home/johannes/Repos/tesseract/testing/eurotext.tif')
# os = api2.DetectOS()
os = api2.DetectOrientationScript() # beide verursachen fehler: 'Speicherzugriffsfehler (Speicherabzug geschrieben)'
print("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n Script: {script}\nScript confidence: {sconfidence}".format(**os))
def setUp(self):
if pil_installed:
with open(self._image_file, 'rb') as f:
self._image = Image.open(f)
self._image.load()
self._api = tesserocr.PyTessBaseAPI(init=True)
def test_page_seg_mode(self):
"""Test SetPageSegMode and GetPageSegMode."""
self._api.SetPageSegMode(tesserocr.PSM.SINGLE_WORD)
self.assertEqual(self._api.GetPageSegMode(), tesserocr.PSM.SINGLE_WORD)
self._api.SetPageSegMode(tesserocr.PSM.AUTO)
self.assertEqual(self._api.GetPageSegMode(), tesserocr.PSM.AUTO)
def test_detect_os(self):
"""Test DetectOS and DetectOrientationScript (tesseract v4+)."""
self._api.SetPageSegMode(tesserocr.PSM.OSD_ONLY)
self._api.SetImageFile(self._image_file)
orientation = self._api.DetectOS()
all(self.assertIn(k, orientation) for k in ['sconfidence', 'oconfidence', 'script', 'orientation'])
self.assertEqual(orientation['orientation'], 0)
languages = tesserocr.get_languages()[1] # this is sorted alphabetically!
self.assertLess(orientation['script'], len(languages))
script_name = languages[orientation['script']] # therefore does not work
#self.assertEqual(script_name, 'Latin') # cannot test: not reliable
if _TESSERACT_VERSION >= 0x3999800:
orientation = self._api.DetectOrientationScript()
all(self.assertIn(k, orientation) for k in ['orient_deg', 'orient_conf', 'script_name', 'script_conf'])
self.assertEqual(orientation['orient_deg'], 0)
self.assertEqual(orientation['script_name'], 'Latin')