How to use errant - 6 common examples

To help you get started, we’ve selected a few errant examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github chrisjbryant / errant / errant / alignment.py View on Github external
def get_all_merge_edits(self):
        edits = []
        for op, group in groupby(self.align_seq, 
                lambda x: True if x[0] == "M" else False):
            if not op:
                merged = self.merge_edits(list(group))
                edits.append(Edit(self.orig, self.cor, merged[0][1:]))
        return edits
github chrisjbryant / errant / errant / alignment.py View on Github external
def get_all_split_edits(self):
        edits = []
        for align in self.align_seq:
            if align[0] != "M": 
                edits.append(Edit(self.orig, self.cor, align[1:]))
        return edits
github chrisjbryant / errant / errant / en / merger.py View on Github external
# Split alignment into groups of M, T and rest. (T has a number after it)
    for op, group in groupby(alignment.align_seq, 
            lambda x: x[0][0] if x[0][0] in {"M", "T"} else False):
        group = list(group)
        # Ignore M
        if op == "M": continue
        # T is always split
        elif op == "T":
            for seq in group:
                edits.append(Edit(alignment.orig, alignment.cor, seq[1:]))
        # Process D, I and S subsequence
        else:
            processed = process_seq(group, alignment)
            # Turn the processed sequence into edits
            for seq in processed: 
                edits.append(Edit(alignment.orig, alignment.cor, seq[1:]))
    return edits
github chrisjbryant / errant / errant / alignment.py View on Github external
def get_all_equal_edits(self):
        edits = []
        for op, group in groupby(self.align_seq, lambda x: x[0]):
            if op != "M":
                merged = self.merge_edits(list(group))
                edits.append(Edit(self.orig, self.cor, merged[0][1:]))
        return edits
github chrisjbryant / errant / errant / commands / parallel_to_m2.py View on Github external
def main():
#    pr = cProfile.Profile()
#    pr.enable()

    # Parse command line args
    args = parse_args()
    print("Loading resources...")
    # Load Errant
    annotator = errant.load("en")
    # Open output m2 file
    out_m2 = open(args.out, "w")

    print("Processing parallel files...")
    # Process an arbitrary number of files line by line simultaneously. Python 3.3+
    # See https://tinyurl.com/y4cj4gth
    with ExitStack() as stack:
        in_files = [stack.enter_context(open(i)) for i in [args.orig]+args.cor]
        # Process each line of all input files
        for line in zip(*in_files):
            # Get the original and all the corrected texts
            orig = line[0].strip()
            cors = line[1:]
            # Skip the line if orig is empty
            if not orig: continue
            # Parse orig with spacy
github chrisjbryant / errant / errant / __init__.py View on Github external
if lang not in supported:
        raise Exception("%s is an unsupported or unknown language" % lang)

    # Load spacy
    nlp = nlp or spacy.load(lang, disable=["ner"])

    # Load language edit merger
    merger = import_module("errant.%s.merger" % lang)

    # Load language edit classifier
    classifier = import_module("errant.%s.classifier" % lang)
    # The English classifier needs spacy
    if lang == "en": classifier.nlp = nlp

    # Return a configured ERRANT annotator
    return Annotator(lang, nlp, merger, classifier)

errant

The ERRor ANnotation Toolkit (ERRANT). Automatically extract and classify edits in parallel sentences.

MIT
Latest version published 1 year ago

Package Health Score

52 / 100
Full package analysis

Similar packages