Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if t.val is not None and t.kind not in {
TOK.WORD,
TOK.ENTITY,
TOK.PUNCTUATION,
}:
# For tokens except words, entities and punctuation, include the val field
if t.kind == TOK.PERSON:
d["v"], d["g"] = TreeUtility.choose_full_name(
t.val, case=None, gender=None
)
else:
d["v"] = t.val
if t.kind in {
TOK.WORD,
TOK.ENTITY,
TOK.PERSON,
TOK.NUMBER,
TOK.YEAR,
TOK.ORDINAL,
TOK.PERCENT,
}:
d["i"] = tags[ix]
ix += 1
if t.kind == TOK.WORD and " " in d["x"]:
# Some kind of phrase: split it
xlist = d["x"].split()
for x in xlist:
d["x"] = x
if x == "og":
# Probably intermediate word: fjármála- og efnahagsráðherra
yield dict(x="og", i="c")
else:
def person_names(self):
""" A generator yielding all person names in an article token stream """
if self._raw_tokens is None and self._tokens:
# Lazy generation of the raw tokens from the JSON rep
self._raw_tokens = json.loads(self._tokens)
if self._raw_tokens:
for p in self._raw_tokens:
for sent in p:
for t in sent:
if t.get("k") == TOK.PERSON:
# The full name of the person is in the v field
yield t["v"]
def token_or_entity(token):
""" Return a token as-is or, if it is a last name of a person
that has already been mentioned in the token stream by full name,
refer to the full name """
assert token.txt[0].isupper()
tfull = lookup_lastname(token.txt)
if tfull is None:
# Not a last name of a previously seen full name
return token
if tfull.kind != TOK.PERSON:
# Return an entity token with no definitions
# (this will eventually need to be looked up by full name when
# displaying or processing the article)
return token_ctor.Entity(token.txt)
# Return the full name meanings
return token_ctor.Person(token.txt, tfull.val)
def create_name_register(tokens, session, all_names=False) -> RegisterType:
""" Assemble a dictionary of person and entity names
occurring in the token list """
register = {} # type: RegisterType
for t in tokens:
if t.kind == TOK.PERSON:
gn = t.val
for pn in gn:
add_name_to_register(pn.name, register, session, all_names=all_names)
elif t.kind == TOK.ENTITY:
add_entity_to_register(t.txt, register, session, all_names=all_names)
return register
# There is a token-terminal match
if t.kind != TOK.PUNCTUATION:
# Annotate with terminal name and BÍN meaning
# (no need to do this for punctuation)
if meaning is not None:
if terminal.first == "fs":
# Special case for prepositions since they're really
# resolved from the preposition list in Main.conf, not from BÍN
wt = WordTuple(stem=meaning.ordmynd, cat="fs")
else:
wt = WordTuple(
stem=meaning.stofn.replace("-", ""), cat=meaning.ordfl
)
elif t.kind == TOK.ENTITY:
wt = WordTuple(stem=t.txt, cat="entity")
if t.val is not None and t.kind == TOK.PERSON:
case = None
gender = None
if terminal is not None and terminal.num_variants >= 1:
gender = terminal.variant(-1)
if gender in {"nf", "þf", "þgf", "ef"}:
# Oops, mistaken identity
case = gender
gender = None
if terminal.num_variants >= 2:
case = terminal.variant(-2)
name, gender = TreeUtility.choose_full_name(t.val, case, gender)
# In any case, add a separate gender indicator field for convenience
wt = WordTuple(stem=name, cat="person_" + gender)
return wt