Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _synset_from_pos_and_line(self, pos, data_file_line):
# Construct a new (empty) synset.
synset = Synset(self)
# parse the entry for this synset
try:
# parse out the definitions and examples from the gloss
columns_str, gloss = data_file_line.split('|')
gloss = gloss.strip()
definitions = []
for gloss_part in gloss.split(';'):
gloss_part = gloss_part.strip()
if gloss_part.startswith('"'):
synset.examples.append(gloss_part.strip('"'))
else:
definitions.append(gloss_part)
synset.definition = '; '.join(definitions)
"""
Get the path(s) from this synset to the root, counting the distance
of each node from the initial node on the way. A set of
(synset, distance) tuples is returned.
:type distance: int
:param distance: the distance (number of edges) from this hypernym to
the original hypernym ``Synset`` on which this method was called.
:return: A set of ``(Synset, int)`` tuples where each ``Synset`` is
a hypernym of the first ``Synset``.
"""
distances = set([(self, distance)])
for hypernym in self.hypernyms() + self.instance_hypernyms():
distances |= hypernym.hypernym_distances(distance+1, simulate_root=False)
if simulate_root:
fake_synset = Synset(None)
fake_synset.name = '*ROOT*'
fake_synset_distance = max(distances, key=itemgetter(1))[1]
distances.add((fake_synset, fake_synset_distance+1))
return distances
def path_similarity(self, synset1, synset2, verbose=False, simulate_root=True):
return synset1.path_similarity(synset2, verbose, simulate_root)
path_similarity.__doc__ = Synset.path_similarity.__doc__
def lch_similarity(self, synset1, synset2, verbose=False, simulate_root=True):
return synset1.lch_similarity(synset2, verbose, simulate_root)
lch_similarity.__doc__ = Synset.lch_similarity.__doc__
def wup_similarity(self, synset1, synset2, verbose=False, simulate_root=True):
return synset1.wup_similarity(synset2, verbose, simulate_root)
wup_similarity.__doc__ = Synset.wup_similarity.__doc__
def res_similarity(self, synset1, synset2, ic, verbose=False):
return synset1.res_similarity(synset2, ic, verbose)
res_similarity.__doc__ = Synset.res_similarity.__doc__
def jcn_similarity(self, synset1, synset2, ic, verbose=False):
return synset1.jcn_similarity(synset2, ic, verbose)
jcn_similarity.__doc__ = Synset.jcn_similarity.__doc__
def lin_similarity(self, synset1, synset2, ic, verbose=False):
return synset1.lin_similarity(synset2, ic, verbose)
lin_similarity.__doc__ = Synset.lin_similarity.__doc__
#////////////////////////////////////////////////////////////
# Morphy
#////////////////////////////////////////////////////////////
# Morphy, adapted from Oliver Steele's pywordnet
def morphy(self, form, pos=None):
"""
Find a possible base form for the given form, with the given
share a single root which disallows this metric from working for
synsets that are not connected. This flag (False by default)
creates a fake root that connects all the taxonomies. Set it
to True to enable this behavior. For the noun taxonomy,
there is usually a default root except for WordNet version 1.6.
If you are using wordnet 1.6, a fake root will need to be added
for nouns as well.
:type use_min_depth: bool
:param use_min_depth: This setting mimics older (v2) behavior of NLTK wordnet
If True, will use the min_depth function to calculate the lowest common
hypernyms. This is known to give strange results for some synset pairs
(eg: 'chef.n.01', 'fireman.n.01') but is retained for backwards compatibility
:return: The synsets that are the lowest common hypernyms of both synsets
"""
fake_synset = Synset(None)
fake_synset.name = '*ROOT*'
fake_synset.hypernyms = lambda: []
fake_synset.instance_hypernyms = lambda: []
if simulate_root:
self_hypernyms = chain(self._iter_hypernym_lists(), [[fake_synset]])
other_hypernyms = chain(other._iter_hypernym_lists(), [[fake_synset]])
else:
self_hypernyms = self._iter_hypernym_lists()
other_hypernyms = other._iter_hypernym_lists()
synsets = set(s for synsets in self_hypernyms for s in synsets)
others = set(s for synsets in other_hypernyms for s in synsets)
synsets.intersection_update(others)
try:
def wup_similarity(self, synset1, synset2, verbose=False, simulate_root=True):
return synset1.wup_similarity(synset2, verbose, simulate_root)
wup_similarity.__doc__ = Synset.wup_similarity.__doc__
def res_similarity(self, synset1, synset2, ic, verbose=False):
return synset1.res_similarity(synset2, ic, verbose)
res_similarity.__doc__ = Synset.res_similarity.__doc__
def jcn_similarity(self, synset1, synset2, ic, verbose=False):
return synset1.jcn_similarity(synset2, ic, verbose)
jcn_similarity.__doc__ = Synset.jcn_similarity.__doc__
def lin_similarity(self, synset1, synset2, ic, verbose=False):
return synset1.lin_similarity(synset2, ic, verbose)
lin_similarity.__doc__ = Synset.lin_similarity.__doc__
#////////////////////////////////////////////////////////////
# Morphy
#////////////////////////////////////////////////////////////
# Morphy, adapted from Oliver Steele's pywordnet
def morphy(self, form, pos=None):
"""
Find a possible base form for the given form, with the given
part of speech, by checking WordNet's list of exceptional
forms, and by recursively stripping affixes for this part of
speech until a form in WordNet is found.
>>> from nltk.corpus import wordnet as wn
>>> print(wn.morphy('dogs'))
dog
>>> print(wn.morphy('churches'))
lch_similarity.__doc__ = Synset.lch_similarity.__doc__
def wup_similarity(synset1, synset2, verbose=False, simulate_root=True):
return synset1.wup_similarity(synset2, verbose, simulate_root)
wup_similarity.__doc__ = Synset.wup_similarity.__doc__
def res_similarity(synset1, synset2, ic, verbose=False):
return synset1.res_similarity(synset2, verbose)
res_similarity.__doc__ = Synset.res_similarity.__doc__
def jcn_similarity(synset1, synset2, ic, verbose=False):
return synset1.jcn_similarity(synset2, verbose)
jcn_similarity.__doc__ = Synset.jcn_similarity.__doc__
def lin_similarity(synset1, synset2, ic, verbose=False):
return synset1.lin_similarity(synset2, verbose)
lin_similarity.__doc__ = Synset.lin_similarity.__doc__
def _lcs_ic(synset1, synset2, ic, verbose=False):
"""
Get the information content of the least common subsumer that has
the highest information content value. If two nodes have no
explicit common subsumer, assume that they share an artificial
root node that is the hypernym of all explicit roots.
:type synset1: Synset
:param synset1: First input synset.
return ic
######################################################################
# Similarity metrics
######################################################################
# TODO: Add in the option to manually add a new root node; this will be
# useful for verb similarity as there exist multiple verb taxonomies.
# More information about the metrics is available at
# http://marimba.d.umn.edu/similarity/measures.html
def path_similarity(synset1, synset2, verbose=False, simulate_root=True):
return synset1.path_similarity(synset2, verbose, simulate_root)
path_similarity.__doc__ = Synset.path_similarity.__doc__
def lch_similarity(synset1, synset2, verbose=False, simulate_root=True):
return synset1.lch_similarity(synset2, verbose, simulate_root)
lch_similarity.__doc__ = Synset.lch_similarity.__doc__
def wup_similarity(synset1, synset2, verbose=False, simulate_root=True):
return synset1.wup_similarity(synset2, verbose, simulate_root)
wup_similarity.__doc__ = Synset.wup_similarity.__doc__
def res_similarity(synset1, synset2, ic, verbose=False):
return synset1.res_similarity(synset2, verbose)
res_similarity.__doc__ = Synset.res_similarity.__doc__
# More information about the metrics is available at
# http://marimba.d.umn.edu/similarity/measures.html
def path_similarity(synset1, synset2, verbose=False, simulate_root=True):
return synset1.path_similarity(synset2, verbose, simulate_root)
path_similarity.__doc__ = Synset.path_similarity.__doc__
def lch_similarity(synset1, synset2, verbose=False, simulate_root=True):
return synset1.lch_similarity(synset2, verbose, simulate_root)
lch_similarity.__doc__ = Synset.lch_similarity.__doc__
def wup_similarity(synset1, synset2, verbose=False, simulate_root=True):
return synset1.wup_similarity(synset2, verbose, simulate_root)
wup_similarity.__doc__ = Synset.wup_similarity.__doc__
def res_similarity(synset1, synset2, ic, verbose=False):
return synset1.res_similarity(synset2, verbose)
res_similarity.__doc__ = Synset.res_similarity.__doc__
def jcn_similarity(synset1, synset2, ic, verbose=False):
return synset1.jcn_similarity(synset2, verbose)
jcn_similarity.__doc__ = Synset.jcn_similarity.__doc__
def lin_similarity(synset1, synset2, ic, verbose=False):
return synset1.lin_similarity(synset2, verbose)
lin_similarity.__doc__ = Synset.lin_similarity.__doc__