Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_all_merge_edits(self):
edits = []
for op, group in groupby(self.align_seq,
lambda x: True if x[0] == "M" else False):
if not op:
merged = self.merge_edits(list(group))
edits.append(Edit(self.orig, self.cor, merged[0][1:]))
return edits
def get_all_split_edits(self):
edits = []
for align in self.align_seq:
if align[0] != "M":
edits.append(Edit(self.orig, self.cor, align[1:]))
return edits
# Split alignment into groups of M, T and rest. (T has a number after it)
for op, group in groupby(alignment.align_seq,
lambda x: x[0][0] if x[0][0] in {"M", "T"} else False):
group = list(group)
# Ignore M
if op == "M": continue
# T is always split
elif op == "T":
for seq in group:
edits.append(Edit(alignment.orig, alignment.cor, seq[1:]))
# Process D, I and S subsequence
else:
processed = process_seq(group, alignment)
# Turn the processed sequence into edits
for seq in processed:
edits.append(Edit(alignment.orig, alignment.cor, seq[1:]))
return edits
def get_all_equal_edits(self):
edits = []
for op, group in groupby(self.align_seq, lambda x: x[0]):
if op != "M":
merged = self.merge_edits(list(group))
edits.append(Edit(self.orig, self.cor, merged[0][1:]))
return edits
def main():
# pr = cProfile.Profile()
# pr.enable()
# Parse command line args
args = parse_args()
print("Loading resources...")
# Load Errant
annotator = errant.load("en")
# Open output m2 file
out_m2 = open(args.out, "w")
print("Processing parallel files...")
# Process an arbitrary number of files line by line simultaneously. Python 3.3+
# See https://tinyurl.com/y4cj4gth
with ExitStack() as stack:
in_files = [stack.enter_context(open(i)) for i in [args.orig]+args.cor]
# Process each line of all input files
for line in zip(*in_files):
# Get the original and all the corrected texts
orig = line[0].strip()
cors = line[1:]
# Skip the line if orig is empty
if not orig: continue
# Parse orig with spacy
if lang not in supported:
raise Exception("%s is an unsupported or unknown language" % lang)
# Load spacy
nlp = nlp or spacy.load(lang, disable=["ner"])
# Load language edit merger
merger = import_module("errant.%s.merger" % lang)
# Load language edit classifier
classifier = import_module("errant.%s.classifier" % lang)
# The English classifier needs spacy
if lang == "en": classifier.nlp = nlp
# Return a configured ERRANT annotator
return Annotator(lang, nlp, merger, classifier)