Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
and script (with both OSD and AnalyseLayout). Rotate the image
accordingly, and annotate the angle, readingDirection and textlineOrder.
Create a corresponding image file, and reference it as AlternativeImage
in the element. Add the new image file to the workspace with the fileGrp USE
given in the second position of the output fileGrp, or ``OCR-D-IMG-DESKEW``,
and an ID based on input file and input element.
Produce a new output file by serialising the resulting hierarchy.
"""
oplevel = self.parameter['operation_level']
with PyTessBaseAPI(
path=TESSDATA_PREFIX,
lang="osd", # osd required for legacy init!
oem=OEM.TESSERACT_LSTM_COMBINED, # legacy required for OSD!
psm=PSM.AUTO_OSD
) as tessapi:
for n, input_file in enumerate(self.input_files):
file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
page_id = input_file.pageId or input_file.ID
LOG.info("INPUT FILE %i / %s", n, page_id)
pcgts = page_from_file(self.workspace.download_file(input_file))
page = pcgts.get_Page()
# add metadata about this operation and its runtime parameters:
metadata = pcgts.get_Metadata() # ensured by from_file()
metadata.add_MetadataItem(
MetadataItemType(type_="processingStep",
name=self.ocrd_tool['steps'][0],
value=TOOL,
Labels=[LabelsType(