Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _addr2nom(address):
""" Convert location name to nominative form. """
# TODO: Implement more intelligently.
# This is a tad simplistic and mucks up some things,
# e.g. "Ráðhús Reykjavíkur" becomes "Ráðhús Reykjavík".
with BIN_Db.get_db() as db:
nf = []
for w in address.split():
bin_res = db.lookup_nominative(w)
if not bin_res and not w.islower():
# Try lowercase form
bin_res = db.lookup_nominative(w.lower())
if bin_res:
nf.append(bin_res[0].ordmynd)
else:
nf.append(w)
return " ".join(nf)
reynir.bintokenizer.tokenize() function.
"""
# Token queue
tq = [] # type: List[Tok]
# Phrases we're considering. Note that an entry of None
# indicates that the accumulated phrase so far is a complete
# and valid known entity name.
state = defaultdict(list) # type: Dict[Union[str, None], List[Tuple[List[str], Entity]]]
# Entitiy definition cache
ecache = dict() # type: Dict[str, List[Entity]]
# Last name to full name mapping ('Clinton' -> 'Hillary Clinton')
lastnames = dict() # type: Dict[str, str]
with BIN_Db.get_db() as db, SessionContext(
session=enclosing_session, commit=True, read_only=True
) as session:
def fetch_entities(w: str, fuzzy=True) -> List[Entity]:
""" Return a list of entities matching the word(s) given,
exactly if fuzzy = False, otherwise also as a starting word(s) """
try:
q = session.query(Entity.name, Entity.verb, Entity.definition)
if fuzzy:
q = q.filter(Entity.name.like(w + " %") | (Entity.name == w))
else:
q = q.filter(Entity.name == w)
return q.all()
except OperationalError as e:
logging.warning("SQL error in fetch_entities(): {0}".format(e))
return []
def _root_lookup(text, at_start, terminal):
""" Look up the root of a word that isn't found in the cache """
with BIN_Db.get_db() as bin_db:
w, m = bin_db.lookup_word(text, at_start)
if m:
# Find the meaning that matches the terminal
td = TerminalNode._TD[terminal]
m = next((x for x in m if td._bin_filter(x)), None)
if m:
if m.fl == "skst":
# For abbreviations, return the original text as the
# root (lemma), not the meaning of the abbreviation
return text
w = m.stofn
return w.replace("-", "")