Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def set_builder(self):
self._builder = builders.LineBoxBuilder()
if hasattr(obj, 'pages'):
pages.update(obj.pages)
else:
pages.add(obj)
index_updater = dsearch.get_index_updater(optimize=False)
for page in set(pages):
if empty_only and len(page.boxes) > 0:
pages.remove(page)
continue
verbose("Running OCR on {} ...".format(page.pageid))
page.boxes = ocr.image_to_string(
page.img,
lang=ocr_lang,
builder=pyocr.builders.LineBoxBuilder()
)
docs.add(page.doc)
verbose("Updating index ...")
for doc in docs:
index_updater.upd_doc(doc)
index_updater.commit()
verbose("Done")
reply({
"ocr": [page.pageid for page in pages]
})
def ocr(im):
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
tool = tools[0]
# print("Will use tool '%s'" % (tool.get_name()))
# langs = tool.get_available_languages()
lang = 'eng'
# print("Will use lang '%s'" % (lang))
return tool.image_to_string(
Image.fromarray(im),
lang=lang,
builder=pyocr.builders.LineBoxBuilder()
)
if orientation['angle'] != 0:
# The angle provided by pyocr is clockwise, so we want to rotate
# the image with an angle of -1 * (clockwise).
# PIL expect a counter-clockwise angle --> -1 * angle
# So they both cancel each other.
img = img.rotate(orientation['angle'], expand=True)
for angle in self.angles:
# tell the observer we decided to not OCR some orientations
if angle == orientation['angle']:
continue
self.emit('ocr-score', angle, 0)
boxes = self.ocr_tool.image_to_string(
img, lang=self.langs['ocr'],
builder=pyocr.builders.LineBoxBuilder())
self.emit('ocr-score', orientation['angle'], 1)
return (orientation['angle'], img, boxes)
def __set_boxes(self, boxes):
boxfile = self.__get_box_path()
with codecs.open(boxfile, 'w', encoding='utf-8') as file_desc:
pyocr.builders.LineBoxBuilder().write_file(file_desc, boxes)
self.drop_cache()
self.doc.drop_cache()
def __set_boxes(self, boxes):
boxfile = self.__box_path
with codecs.open(boxfile, 'w', encoding='utf-8') as file_desc:
pyocr.builders.LineBoxBuilder().write_file(file_desc, boxes)
self.drop_cache()
self.doc.drop_cache()
def __set_boxes(self, boxes):
boxfile = self.__box_path
with self.fs.open(boxfile, 'w') as file_desc:
pyocr.builders.LineBoxBuilder().write_file(file_desc, boxes)