Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_bleach_html_parser(parser_args, data, expected):
args = {
'tags': None,
'strip': True,
'consume_entities': True
}
args.update(parser_args)
# Build a parser, walker, and serializer just like we do in clean()
parser = html5lib_shim.BleachHTMLParser(**args)
walker = html5lib_shim.getTreeWalker('etree')
serializer = html5lib_shim.BleachHTMLSerializer(
quote_attr_values='always',
omit_optional_tags=False,
escape_lt_in_attrs=True,
resolve_entities=False,
sanitize=False,
alphabetical_attributes=False,
)
# Parse, walk, and then serialize the output
dom = parser.parseFragment(data)
serialized = serializer.render(walker(dom))
assert serialized == expected
def test_serializer(data, expected):
# Build a parser, walker, and serializer just like we do in clean()
parser = html5lib_shim.BleachHTMLParser(
tags=None,
strip=True,
consume_entities=False,
namespaceHTMLElements=False
)
walker = html5lib_shim.getTreeWalker('etree')
serializer = html5lib_shim.BleachHTMLSerializer(
quote_attr_values='always',
omit_optional_tags=False,
escape_lt_in_attrs=True,
resolve_entities=False,
sanitize=False,
alphabetical_attributes=False,
)
# Parse, walk, and then serialize the output
dom = parser.parseFragment(data)
serialized = serializer.render(walker(dom))
assert serialized == expected
"""
self.callbacks = callbacks
self.skip_tags = skip_tags
self.parse_email = parse_email
self.url_re = url_re
self.email_re = email_re
# Create a parser/tokenizer that allows all HTML tags and escapes
# anything not in that list.
self.parser = html5lib_shim.BleachHTMLParser(
tags=recognized_tags,
strip=False,
consume_entities=True,
namespaceHTMLElements=False,
)
self.walker = html5lib_shim.getTreeWalker('etree')
self.serializer = html5lib_shim.BleachHTMLSerializer(
quote_attr_values='always',
omit_optional_tags=False,
# linkify does not sanitize
sanitize=False,
# linkify alphabetizes
alphabetical_attributes=False,
)
"""
self.tags = tags
self.attributes = attributes
self.styles = styles
self.protocols = protocols
self.strip = strip
self.strip_comments = strip_comments
self.filters = filters or []
self.parser = html5lib_shim.BleachHTMLParser(
tags=self.tags,
strip=self.strip,
consume_entities=False,
namespaceHTMLElements=False
)
self.walker = html5lib_shim.getTreeWalker('etree')
self.serializer = html5lib_shim.BleachHTMLSerializer(
quote_attr_values='always',
omit_optional_tags=False,
escape_lt_in_attrs=True,
# We want to leave entities as they are without escaping or
# resolving or expanding
resolve_entities=False,
# Bleach has its own sanitizer, so don't use the html5lib one
sanitize=False,
# Bleach sanitizer alphabetizes already, so don't use the html5lib one
alphabetical_attributes=False,
)