Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def clean_html(input):
# Reaplce html tags from user input, see utils.test for examples
ok_tags = [u"a", u"img", u"strong", u"b", u"em", u"i", u"u", u"ul", u"li", u"p", u"br", u"blockquote", u"code"]
ok_attributes = {u"a": [u"href", u"rel"], u"img": [u"src", u"alt", u"title"]}
# all other tags: replace with the content of the tag
# If input contains link in the format: then convert it to < http:// >
# This is because otherwise the library recognizes it as a tag and breaks the link.
input = re.sub("\<(http\S+?)\>", r'< \1 >', input)
cleaner = bleach.Cleaner(
filters=[
EmptyLinkFilter,
partial(bleach.linkifier.LinkifyFilter, callbacks=[nofollow]),
],
attributes=ok_attributes,
tags=ok_tags,
strip=True)
output = cleaner.clean(input)
return output
def clean_html(input):
# Reaplce html tags from user input, see utils.test for examples
ok_tags = [u"a", u"img", u"strong", u"b", u"em", u"i", u"u", u"ul", u"li", u"p", u"br", u"blockquote", u"code"]
ok_attributes = {u"a": [u"href", u"rel"], u"img": [u"src", u"alt", u"title"]}
# all other tags: replace with the content of the tag
# If input contains link in the format: then convert it to < http:// >
# This is because otherwise the library recognizes it as a tag and breaks the link.
input = re.sub("\<(http\S+?)\>", r'< \1 >', input)
cleaner = bleach.Cleaner(
filters=[
EmptyLinkFilter,
partial(bleach.linkifier.LinkifyFilter, callbacks=[nofollow]),
],
attributes=ok_attributes,
tags=ok_tags,
strip=True)
output = cleaner.clean(input)
return output
def test_no_href_links(self):
s = '<a name="anchor">x</a>'
assert linkify(s) == s
def test_ports(data, expected_data):
"""URLs can contain port numbers."""
out = '<a rel="nofollow" href="{0}">{0}</a>{1}'
assert linkify(data) == out.format(*expected_data)
def test_mangle_text():
"""We can muck with the inner text of a link."""
def ft(attrs, new=False):
attrs['_text'] = 'bar'
return attrs
assert (
linkify('http://ex.mp <a href="http://ex.mp/foo">foo</a>', callbacks=[ft]) ==
'<a href="http://ex.mp">bar</a> <a href="http://ex.mp/foo">bar</a>'
)
def test_has_nofollow_already(self):
attrs = {
(None, 'href'): 'http://example.com',
(None, 'rel'): 'nofollow',
}
assert nofollow(attrs) == attrs
def test_mailto(self):
attrs = {(None, 'href'): 'mailto:joe@example.com'}
assert nofollow(attrs) == attrs
def test_other_rel(self):
attrs = {
(None, 'href'): 'http://example.com',
(None, 'rel'): 'next',
}
assert (
nofollow(attrs) ==
{(None, 'href'): 'http://example.com', (None, 'rel'): 'next nofollow'}
)
def test_bleach_html_parser(parser_args, data, expected):
args = {
'tags': None,
'strip': True,
'consume_entities': True
}
args.update(parser_args)
# Build a parser, walker, and serializer just like we do in clean()
parser = html5lib_shim.BleachHTMLParser(**args)
walker = html5lib_shim.getTreeWalker('etree')
serializer = html5lib_shim.BleachHTMLSerializer(
quote_attr_values='always',
omit_optional_tags=False,
escape_lt_in_attrs=True,
resolve_entities=False,
sanitize=False,
alphabetical_attributes=False,
)
# Parse, walk, and then serialize the output
dom = parser.parseFragment(data)
serialized = serializer.render(walker(dom))
assert serialized == expected
def test_convert_entities(data, expected):
assert html5lib_shim.convert_entities(data) == expected