Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __get_n_grams(self, text, n_type, n):
#removes the stop words
tokens = list(self.__tokenize_input(text))
if n_type == "word":
if len(tokens) > n:
return ["".join(j) for j in zip(*[tokens[i:] for i in range(n)])]
else:
#returns the word directly if n is greater than number of words
a = list()
a.append(text)
return a
if n_type == "character":
gram_object = ngram.NGram(N=n)
gram_char_tokens = list(gram_object.split(text))
if len(text) > n:
return gram_char_tokens
else:
a = list()
a.append(text)
return a
else:
return list(self.__tokenize_input(text))