How to use the reynir.bindb.BIN_Db function in reynir

To help you get started, we’ve selected a few reynir examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mideind / ReynirPackage / test / test_cases.py View on Github external
def test_casting():
    """ Test functions to cast words in nominative case to other cases """
    from reynir.bindb import BIN_Db
    db = BIN_Db()

    assert db.cast_to_accusative("") == ""
    assert db.cast_to_dative("") == ""
    assert db.cast_to_genitive("") == ""

    assert db.cast_to_accusative("xxx") == "xxx"
    assert db.cast_to_dative("xxx") == "xxx"
    assert db.cast_to_genitive("xxx") == "xxx"

    assert db.cast_to_accusative("maðurinn") == "manninn"
    assert db.cast_to_dative("maðurinn") == "manninum"
    assert db.cast_to_genitive("maðurinn") == "mannsins"

    assert db.cast_to_accusative("mennirnir") == "mennina"
    assert db.cast_to_dative("mennirnir") == "mönnunum"
    assert db.cast_to_genitive("mennirnir") == "mannanna"
github mideind / Greynir / utils / orn.py View on Github external
sys.exit(1)

    db_conn = sqlite3.connect(db_path, check_same_thread=False)
    db_conn.row_factory = lambda c, r: dict(zip([col[0] for col in c.description], r))

    q = "SELECT DISTINCT nafn FROM ornefni;"

    res = db_conn.cursor().execute(q)

    matches = [row["nafn"] for row in res]

    num_bin = 0
    num_comb = 0
    num_fail = 0

    with BIN_Db.get_db() as db:
        for m in matches:
            w = m.strip()
            if " " in w or "-" in w or "-" in w:
                continue

            # Direct BÍN lookup
            meanings = db.meanings(w)
            if meanings:
                num_bin += 1
                continue

            # Lookup using BÍN and combinator
            _, meanings = db.lookup_word(w, auto_uppercase=True)
            if meanings:
                num_comb += 1
                continue
github mideind / Greynir / tree.py View on Github external
def process(self, session, processor, **kwargs):
        """ Process a tree for an entire article """
        # For each sentence in turn, do a depth-first traversal,
        # visiting each parent node after visiting its children
        # Initialize the running state that we keep between sentences

        article_begin = getattr(processor, "article_begin", None) if processor else None
        article_end = getattr(processor, "article_end", None) if processor else None
        sentence = getattr(processor, "sentence", None) if processor else None
        # If visit(state, node) returns False for a node, do not visit child nodes
        visit = getattr(processor, "visit", None) if processor else None
        # If no handler exists for a nonterminal, call default() instead
        default = getattr(processor, "default", None) if processor else None

        with BIN_Db.get_db() as bin_db:

            state = {
                "session": session,
                "processor": processor,
                "bin_db": bin_db,
                "url": self.url,
                "authority": self.authority,
                "_sentence": sentence,
                "_visit": visit,
                "_default": default,
                "index": 0,
            }
            # Add state parameters passed via keyword arguments, if any
            state.update(kwargs)

            # Call the article_begin(state) function, if it exists
github mideind / Greynir / routes / stats.py View on Github external
def top_authors(days=_TOP_AUTHORS_PERIOD, session=None):
    """ Generate list of top authors w. parse percentage. """
    end = datetime.utcnow()
    start = end - timedelta(days=days)
    authors = BestAuthorsQuery.period(
        start, end, enclosing_session=session, min_articles=10
    )[:20]

    authresult = list()
    with BIN_Db.get_db() as bindb:
        for a in authors:
            name = a[0]
            gender = bindb.lookup_name_gender(name)
            if gender == "hk":  # Skip unnamed authors (e.g. "Ritstjórn Vísis")
                continue
            perc = round(float(a[4]), 2)
            authresult.append({"name": name, "gender": gender, "perc": perc})

    return authresult[:10]
github mideind / Greynir / routes / people.py View on Github external
]  # Go through up to 2 * N records
        )

        def is_better_title(new_title, old_title):
            len_new = len(new_title)
            len_old = len(old_title)
            if len_old >= _MAX_TITLE_LENGTH:
                # Too long: we want a shorter one
                return len_new < len_old
            if len_new >= _MAX_TITLE_LENGTH:
                # This one is too long: we don't want it
                return False
            # Otherwise, longer is better
            return len_new > len_old

        with BIN_Db.get_db() as bindb:
            for p in q:
                # Insert the name into the list if it's not already there,
                # or if the new title is longer than the previous one
                if p.name not in toplist or is_better_title(
                    p.title, toplist[p.name][0]
                ):
                    toplist[p.name] = (
                        correct_spaces(p.title),
                        p.article_url,
                        p.id,
                        bindb.lookup_name_gender(p.name),
                    )
                    if len(toplist) >= limit:
                        # We now have as many names as we initially wanted: terminate the loop
                        break
github mideind / Greynir / tnttagger.py View on Github external
def tagset(self, word, at_sentence_start=False):
        """ Return a list of (probability, tag) tuples for the given word """
        toklist = list(parse_tokens(" ".join(word)))
        token = toklist[0]
        w = word[0]
        if token.kind == TOK.WORD and token.val is None:
            try:
                with BIN_Db.get_db() as db:
                    w, m = db.lookup_word(token.txt, at_sentence_start)
            except Exception:
                w, m = token.txt, []
            token = TOK.Word(w, m)
        return self._ngram_tagger.tag_single_token(token)
github mideind / Greynir / queries / geography.py View on Github external
# Get country code
    cc = isocode_for_country_name(country)
    if not cc:
        logging.warning("No CC for country {0}".format(country))
        return False

    # Find capital city, given the country code
    capital = capital_for_cc(cc)
    if not capital:
        return False

    # Use the Icelandic name for the city
    ice_cname = icelandic_city_name(capital["name_ascii"])

    # Look up genitive country name for voice description
    bres = BIN_Db().lookup_genitive(country, cat="no")
    country_gen = bres[0].ordmynd if bres else country

    answer = ice_cname
    response = dict(answer=answer)
    voice = "Höfuðborg {0} er {1}".format(country_gen, answer)

    q.set_answer(response, answer, voice)
    q.set_key("Höfuðborg {0}".format(country_gen))
    q.set_context(dict(subject=ice_cname))

    return True
github mideind / Greynir / queries / words.py View on Github external
def lookup_best_word(word):
    """ Look up word in BÍN, pick right one acc. to a criterion. """
    with BIN_Db().get_db() as db:

        def nouns_only(bin_meaning):
            return bin_meaning.ordfl in ("kk", "kvk", "hk")

        res = list(filter(nouns_only, db.lookup_nominative(word)))
        if not res:
            # Try with uppercase first char
            capw = word.capitalize()
            res = list(filter(nouns_only, db.lookup_nominative(capw)))
            if not res:
                return None

        # OK, we have one or more matching nouns
        if len(res) == 1:
            m = res[0]
        else:
github mideind / Greynir / queries / __init__.py View on Github external
def nom2dat(w):
    """ Look up the dative form of a noun in BÍN. """
    if not w:
        return ""

    def sort_by_preference(m_list):
        """ Discourage rarer declension forms, i.e. ÞGF2 and ÞGF3 """
        return sorted(m_list, key=lambda m: "2" in m.beyging or "3" in m.beyging)

    with BIN_Db().get_db() as db:
        return db.cast_to_dative(w, meaning_filter_func=sort_by_preference)
github mideind / Greynir / queries / geography.py View on Github external
def QGeoSubject(node, params, result):
    n = capitalize_placename(result._text)
    bin_res = BIN_Db().lookup_nominative(n)
    res = bin_res[0].stofn if bin_res else n
    result.subject = res