Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
try:
kwargs[opt] = getattr(opts, opt)
except:
pass
if not kwargs['quote_char']:
del kwargs['quote_char']
if opts.sanitize:
kwargs["sanitize"] = True
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
if sys.version_info[0] >= 3:
encoding = None
else:
encoding = "utf-8"
for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
sys.stdout.write(text)
if not text.endswith('\n'):
sys.stdout.write('\n')
if opts.error:
errList = []
for pos, errorcode, datavars in parser.errors:
errList.append("Line %i Col %i" % pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
sys.stdout.write("\nParse errors:\n" + "\n".join(errList) + "\n")
def camoify(ctx, value):
request = ctx.get("request") or get_current_request()
# Parse the rendered output and replace any inline images that don't point
# to HTTPS with camouflaged images.
tree_builder = html5lib.treebuilders.getTreeBuilder("dom")
parser = html5lib.html5parser.HTMLParser(tree=tree_builder)
dom = parser.parse(value)
for element in dom.getElementsByTagName("img"):
src = element.getAttribute("src")
if src:
element.setAttribute("src", request.camo_url(src))
tree_walker = html5lib.treewalkers.getTreeWalker("dom")
html_serializer = html5lib.serializer.HTMLSerializer()
camoed = "".join(html_serializer.serialize(tree_walker(dom)))
return camoed
def run(self, text):
parsed = html5lib.parseFragment(text)
# if we didn't have to customize our sanitization, could just do:
# return html5lib.serialize(parsed, sanitize=True)
# instead we do the same steps as that function,
# but add our ForgeHTMLSanitizerFilter instead of sanitize=True which would use the standard one
TreeWalker = html5lib.treewalkers.getTreeWalker("etree")
walker = TreeWalker(parsed)
walker = ForgeHTMLSanitizerFilter(walker) # this is our custom step
s = html5lib.serializer.HTMLSerializer()
return s.render(walker)
def _get_serializer(self, **options):
soptions = self._default_serializer_options.copy()
soptions.update(options)
if not (self._serializer and self._serializer_options == soptions):
self._serializer = html5lib.serializer.HTMLSerializer(**soptions)
self._serializer_options = soptions
return self._serializer
"""
parser_kwargs = {}
serializer_kwargs = {}
if sanitize:
if HTMLSanitizer is None:
# new syntax as of 0.99999999/1.0b9 (Released on July 14, 2016)
serializer_kwargs["sanitize"] = True
else:
parser_kwargs["tokenizer"] = HTMLSanitizer
p = HTMLParser(tree=treebuilders.getTreeBuilder("dom"), **parser_kwargs)
dom_tree = p.parseFragment(input)
walker = treewalkers.getTreeWalker("dom")
stream = walker(dom_tree)
s = HTMLSerializer(omit_optional_tags=False, **serializer_kwargs)
return "".join(s.serialize(stream))