Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_define(self):
w = tb.Word("hack")
synsets = w.get_synsets(wn.NOUN)
definitions = w.define(wn.NOUN)
assert_equal(len(synsets), len(definitions))
def test_lemmatize(self):
w = tb.Word("cars")
assert_equal(w.lemmatize(), "car")
w = tb.Word("wolves")
assert_equal(w.lemmatize(), "wolf")
w = tb.Word("went")
assert_equal(w.lemmatize("v"), "go") # wordnet tagset
assert_equal(w.lemmatize("VBD"), "go") # penn treebank tagset
def test_word_lists():
animals = TextBlob("cat dog octopus ocropus")
pluralized_words = animals.words.pluralize()
corrected_words = animals.correct()
word_ocropus = Word('ocropus')
word_ocr_spellechecked = word_ocropus.spellcheck()
word_mice = Word('mice')
word_mice_lemmatized = word_mice.lemmatize()
word_highest = Word('highest')
word_highest_lemmatized = word_highest.lemmatize()
# test word net simmilarities
king_synsets = Word("king").get_synsets(pos=NOUN)
king = Synset('king.n.01')
queen = Synset('queen.n.02')
man = Synset('man.n.01')
wife = Synset('wife.n.01')
woman = Synset('woman.n.01')
octopus = Synset('octopus.n.01')
kq_similarity = king.path_similarity(queen)
km_similarity = king.path_similarity(man)
def suggest_synonyms(words, target_words):
suggestions = []
word_synonyms = [(Word(w[0]).get_synsets(pos=VERB), w[1]) for w in target_words]
for w in words:
found = False
synset = (Word(w[0]).get_synsets(pos=VERB), w[1])
if len(synset[0]):
for synonym in [s for s in word_synonyms if len(s[0])]:
similarity = synset[0][0].path_similarity(synonym[0][0])
if similarity == 1.0:
found = True
if 1.0 > similarity > 0.4 and not found:
suggestions.append((synset[0][0].name().split(".")[0], synonym[0][0].name().split(".")[0]))
return suggestions
def __call__(self, x):
"""
Predict
:param x: Text to classify
:return:
"""
# Analyze text
text_blob = TextBlob(x)
# For each forbidden word
for word in text_blob.words:
if Word(word.lower()).lemmatize() in self._forbidden_words:
return 'neg', {'neg': 1.0, 'pos': 0.0}
# end if
# end for
return 'pos', {'neg': 0.0, 'pos': 1.0}
# end __call__
def _get_synsets(string, pos=None):
if pos is None:
word = Word(string)
synsets = word.synsets
else:
word = Word(string)
synsets = word.get_synsets(pos)
return synsets
skype_usernames = []
for i in skype_indices:
jmin = max(i-self.word_radius, 0)
jmax = min(i+self.word_radius+1, len(tokens))
for j in list(range(jmin, i)) + list(range(i+1, jmax)):
token = tokens[j]
if self.filth_cls.SKYPE_USERNAME.match(token):
# this token is a valid skype username. Most skype
# usernames appear to be misspelled words. Word.spellcheck
# does not handle the situation of an all caps word very
# well, so we cast these to all lower case before checking
# whether the word is misspelled
if token.isupper():
token = token.lower()
word = textblob.Word(token)
suggestions = word.spellcheck()
corrected_word, score = suggestions[0]
if score < 0.5:
skype_usernames.append(token)
# replace all skype usernames
if skype_usernames:
self.filth_cls.regex = re.compile('|'.join(skype_usernames))
else:
self.filth_cls.regex = None
return super(SkypeDetector, self).iter_filth(text)
def check_min_max_semantic(self, dependency, jjs: str, sentence):
"""
Args:
dependency:
jjs:
sentence:
Returns:
"""
# probably beter to change this to a tuple of keywords, removes the call to wordnet etc
max = dependency[1][0]
min_w = dependency[1][1]
jj_word = Word(jjs)
max_word = Word(max)
min_word = Word(min_w)
len_tags = len(sentence.tags)
new_tags = sentence.tags
penultimate_word = new_tags[len_tags - 2]
end_word = new_tags[len_tags - 1]
for word in jj_word.synsets:
for set in min_word.synsets:
if word == set:
for keyword in self.ANALYSIS_KEYWORDS:
if keyword == end_word[0].lower() and keyword == 'excess':
result = excess_controller(database_connection_uri(retrieve='retrieve'),
direction='smallest')
response = [
'SKU {} has the smallest excess value at {}{:,.2f}'.format(str(result[1]),
self.currency_symbol,
def check_min_max_semantic(self, dependency, jjs: str, sentence):
"""
Args:
dependency:
jjs:
sentence:
Returns:
"""
# probably beter to change this to a tuple of keywords, removes the call to wordnet etc
max = dependency[1][0]
min_w = dependency[1][1]
jj_word = Word(jjs)
max_word = Word(max)
min_word = Word(min_w)
len_tags = len(sentence.tags)
new_tags = sentence.tags
penultimate_word = new_tags[len_tags - 2]
end_word = new_tags[len_tags - 1]
for word in jj_word.synsets:
for set in min_word.synsets:
if word == set:
for keyword in self.ANALYSIS_KEYWORDS:
if keyword == end_word[0].lower() and keyword == 'excess':
result = excess_controller(database_connection_uri(retrieve='retrieve'),
direction='smallest')
response = [
'SKU {} has the smallest excess value at {}{:,.2f}'.format(str(result[1]),