How to use the reynir.TOK.PERSON function in reynir

To help you get started, we’ve selected a few reynir examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mideind / Greynir / postagger.py View on Github external
if t.val is not None and t.kind not in {
                    TOK.WORD,
                    TOK.ENTITY,
                    TOK.PUNCTUATION,
                }:
                    # For tokens except words, entities and punctuation, include the val field
                    if t.kind == TOK.PERSON:
                        d["v"], d["g"] = TreeUtility.choose_full_name(
                            t.val, case=None, gender=None
                        )
                    else:
                        d["v"] = t.val
                if t.kind in {
                    TOK.WORD,
                    TOK.ENTITY,
                    TOK.PERSON,
                    TOK.NUMBER,
                    TOK.YEAR,
                    TOK.ORDINAL,
                    TOK.PERCENT,
                }:
                    d["i"] = tags[ix]
                    ix += 1
                if t.kind == TOK.WORD and " " in d["x"]:
                    # Some kind of phrase: split it
                    xlist = d["x"].split()
                    for x in xlist:
                        d["x"] = x
                        if x == "og":
                            # Probably intermediate word: fjármála- og efnahagsráðherra
                            yield dict(x="og", i="c")
                        else:
github mideind / Greynir / article.py View on Github external
def person_names(self):
        """ A generator yielding all person names in an article token stream """
        if self._raw_tokens is None and self._tokens:
            # Lazy generation of the raw tokens from the JSON rep
            self._raw_tokens = json.loads(self._tokens)
        if self._raw_tokens:
            for p in self._raw_tokens:
                for sent in p:
                    for t in sent:
                        if t.get("k") == TOK.PERSON:
                            # The full name of the person is in the v field
                            yield t["v"]
github mideind / Greynir / nertokenizer.py View on Github external
def token_or_entity(token):
            """ Return a token as-is or, if it is a last name of a person
                that has already been mentioned in the token stream by full name,
                refer to the full name """
            assert token.txt[0].isupper()
            tfull = lookup_lastname(token.txt)
            if tfull is None:
                # Not a last name of a previously seen full name
                return token
            if tfull.kind != TOK.PERSON:
                # Return an entity token with no definitions
                # (this will eventually need to be looked up by full name when
                # displaying or processing the article)
                return token_ctor.Entity(token.txt)
            # Return the full name meanings
            return token_ctor.Person(token.txt, tfull.val)
github mideind / Greynir / queries / builtin.py View on Github external
def create_name_register(tokens, session, all_names=False) -> RegisterType:
    """ Assemble a dictionary of person and entity names
        occurring in the token list """
    register = {}  # type: RegisterType
    for t in tokens:
        if t.kind == TOK.PERSON:
            gn = t.val
            for pn in gn:
                add_name_to_register(pn.name, register, session, all_names=all_names)
        elif t.kind == TOK.ENTITY:
            add_entity_to_register(t.txt, register, session, all_names=all_names)
    return register
github mideind / Greynir / treeutil.py View on Github external
# There is a token-terminal match
            if t.kind != TOK.PUNCTUATION:
                # Annotate with terminal name and BÍN meaning
                # (no need to do this for punctuation)
                if meaning is not None:
                    if terminal.first == "fs":
                        # Special case for prepositions since they're really
                        # resolved from the preposition list in Main.conf, not from BÍN
                        wt = WordTuple(stem=meaning.ordmynd, cat="fs")
                    else:
                        wt = WordTuple(
                            stem=meaning.stofn.replace("-", ""), cat=meaning.ordfl
                        )
                elif t.kind == TOK.ENTITY:
                    wt = WordTuple(stem=t.txt, cat="entity")
        if t.val is not None and t.kind == TOK.PERSON:
            case = None
            gender = None
            if terminal is not None and terminal.num_variants >= 1:
                gender = terminal.variant(-1)
                if gender in {"nf", "þf", "þgf", "ef"}:
                    # Oops, mistaken identity
                    case = gender
                    gender = None
                if terminal.num_variants >= 2:
                    case = terminal.variant(-2)
            name, gender = TreeUtility.choose_full_name(t.val, case, gender)
            # In any case, add a separate gender indicator field for convenience
            wt = WordTuple(stem=name, cat="person_" + gender)
        return wt