Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_convert_entities(data, expected):
assert html5lib_shim.convert_entities(data) == expected
def sanitize_css(self, style):
"""Sanitizes css in style tags"""
# Convert entities in the style so that it can be parsed as CSS
style = html5lib_shim.convert_entities(style)
# Drop any url values before we do anything else
style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
# The gauntlet of sanitization
# Validate the css in the style tag and if it's not valid, then drop
# the whole thing.
parts = style.split(';')
gauntlet = re.compile(
r"""^([-/:,#%.'"\s!\w]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""",
flags=re.U
)
for part in parts:
if not gauntlet.match(part):
def sanitize_uri_value(self, value, allowed_protocols):
"""Checks a uri value to see if it's allowed
:arg value: the uri value to sanitize
:arg allowed_protocols: list of allowed protocols
:returns: allowed value or None
"""
# NOTE(willkg): This transforms the value into one that's easier to
# match and verify, but shouldn't get returned since it's vastly
# different than the original value.
# Convert all character entities in the value
new_value = html5lib_shim.convert_entities(value)
# Nix backtick, space characters, and control characters
new_value = re.sub(
r"[`\000-\040\177-\240\s]+",
'',
new_value
)
# Remove REPLACEMENT characters
new_value = new_value.replace('\ufffd', '')
# Lowercase it--this breaks the value, but makes it easier to match
# against
new_value = new_value.lower()
try: