How to use pyahocorasick - 10 common examples

To help you get started, we’ve selected a few pyahocorasick examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def get_test_automaton():
			words = "he her hers his she hi him man himan".split()

			t = Trie();
			for w in words:
				t.add_word(w, w)

			t.make_automaton()

			return t
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testAddedWordShouldBeCountedAndAvailableForRetrieval(self):
		t = Trie()
		t.add_word('python', 'value')
		self.assertEqual(len(t), 1)
		self.assertEqual(t.get('python'), 'value')
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testItemsShouldReturnAllItemsAlreadyAddedToTheTrie(self):
		t = Trie()

		t.add_word('python', 1)
		t.add_word('ada', 2)
		t.add_word('perl', 3)
		t.add_word('pascal', 4)
		t.add_word('php', 5)

		result = list(t.items())
		self.assertEquals(len(result), 5)
		self.assertIn(('python', 1), result)
		self.assertIn(('ada',    2), result)
		self.assertIn(('perl',   3), result)
		self.assertIn(('pascal', 4), result)
		self.assertIn(('php',    5), result)
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testExistShouldDetectAddedWords(self):
		t = Trie()
		t.add_word('python', 'value')
		t.add_word('ada', 'value')

		self.assertTrue(t.exists('python'))
		self.assertTrue(t.exists('ada'))
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testEmptyTrieShouldNotContainsAnyWords(self):
		t = Trie()
		self.assertEqual(len(t), 0)
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testAddingExistingWordShouldReplaceAssociatedValue(self):
		t = Trie()
		t.add_word('python', 'value')
		self.assertEqual(len(t), 1)
		self.assertEqual(t.get('python'), 'value')

		t.add_word('python', 'other')
		self.assertEqual(len(t), 1)
		self.assertEqual(t.get('python'), 'other')
github WojciechMula / pyahocorasick / py / unittests.py View on Github external
def testGetUnknowWordWithDefaultValueShouldReturnDefault(self):
		t = Trie()
		self.assertEqual(t.get('python', 'default'), 'default')
github WojciechMula / pyahocorasick / py / issue_21.py View on Github external
def test(case):

    tree = pyahocorasick.Trie()
    for word in case['words']:
        tree.add_word(word, word)

    tree.make_automaton()

    actual = [item for item in tree.iter_long(case['input'])]

    if actual != case['expected']:
        print("ERROR:")
        print(actual)
        print(case['expected'])
        assert(False)
github WojciechMula / pyahocorasick / py / exportdot.py View on Github external
writeln("\tnode%d -> node%d [label=\"%s\"]" % (nodeid, destid, label))

	# fail links
	for node in nodes:
		nodeid = id(node)
		failid = id(node.fail)

		if failid != pyahocorasick.nil:
			writeln("\tnode%d -> node%d [color=blue]" % (nodeid, failid))

	writeln("}")


if __name__ == '__main__':
	A = pyahocorasick.Trie()

	A.add_word("he", 0)
	A.add_word("her", 1)
	A.add_word("hers", 2)
	A.add_word("she", 3)
	A.add_word("cat", 4)
	A.add_word("shield", 5)

	with open('trie.dot', 'wt') as f:
		exportdot(A, f)

	A.make_automaton()

	with open('ahocorasick.dot', 'wt') as f:
		exportdot(A, f)
github WojciechMula / pyahocorasick / py / exportdot.py View on Github external
def walk(node):
		queue = [node]
		while queue:
			node = queue.pop()
			yield node

			for child in node.children.itervalues():
				if child != node:
					queue.append(child)

	nodes = list(walk(trie.root))

	# nodes
	for node in nodes:
		if node.output != pyahocorasick.nil:
			writeln("\tnode%d [shape=doublecircle, label=\"\"]" % id(node))
		else:
			writeln("\tnode%d [shape=circle, label=\"\"]" % id(node))

	# trie edges
	for node in nodes:
		for letter, child in node.children.iteritems():
			nodeid = id(node)
			destid = id(child)
			if destid == id(trie.root):
				# do not show self-links of root node created during make_automaton
				continue

			if letter.isalnum():
				label = letter
			else:

pyahocorasick

pyahocorasick is a fast and memory efficient library for exact or approximate multi-pattern string search. With the ``ahocorasick.Automaton`` class, you can find multiple key string occurrences at once in some input text. You can use it as a plain dict-like Trie or convert a Trie to an automaton for efficient Aho-Corasick search. And pickle to disk for easy reuse of large automatons. Implemented in C and tested on Python 3.6+. Works on Linux, macOS and Windows. BSD-3-Cause license.

BSD-3-Clause
Latest version published 9 months ago

Package Health Score

78 / 100
Full package analysis

Popular pyahocorasick functions