How to use the html5lib.serializer.HTMLSerializer function in html5lib

To help you get started, we’ve selected a few html5lib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github web-platform-tests / wpt / tools / third_party / html5lib / parse.py View on Github external
try:
                    kwargs[opt] = getattr(opts, opt)
                except:
                    pass
            if not kwargs['quote_char']:
                del kwargs['quote_char']

            if opts.sanitize:
                kwargs["sanitize"] = True

            tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
            if sys.version_info[0] >= 3:
                encoding = None
            else:
                encoding = "utf-8"
            for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
                sys.stdout.write(text)
            if not text.endswith('\n'):
                sys.stdout.write('\n')
    if opts.error:
        errList = []
        for pos, errorcode, datavars in parser.errors:
            errList.append("Line %i Col %i" % pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
        sys.stdout.write("\nParse errors:\n" + "\n".join(errList) + "\n")
github pypa / warehouse / warehouse / filters.py View on Github external
def camoify(ctx, value):
    request = ctx.get("request") or get_current_request()

    # Parse the rendered output and replace any inline images that don't point
    # to HTTPS with camouflaged images.
    tree_builder = html5lib.treebuilders.getTreeBuilder("dom")
    parser = html5lib.html5parser.HTMLParser(tree=tree_builder)
    dom = parser.parse(value)

    for element in dom.getElementsByTagName("img"):
        src = element.getAttribute("src")
        if src:
            element.setAttribute("src", request.camo_url(src))

    tree_walker = html5lib.treewalkers.getTreeWalker("dom")
    html_serializer = html5lib.serializer.HTMLSerializer()
    camoed = "".join(html_serializer.serialize(tree_walker(dom)))

    return camoed
github apache / allura / Allura / allura / lib / markdown_extensions.py View on Github external
def run(self, text):
        parsed = html5lib.parseFragment(text)

        # if we didn't have to customize our sanitization, could just do:
        # return html5lib.serialize(parsed, sanitize=True)

        # instead we do the same steps as that function,
        # but add our ForgeHTMLSanitizerFilter instead of sanitize=True which would use the standard one
        TreeWalker = html5lib.treewalkers.getTreeWalker("etree")
        walker = TreeWalker(parsed)
        walker = ForgeHTMLSanitizerFilter(walker)  # this is our custom step
        s = html5lib.serializer.HTMLSerializer()
        return s.render(walker)
github mdn / kuma / kuma / wiki / content.py View on Github external
def _get_serializer(self, **options):
        soptions = self._default_serializer_options.copy()
        soptions.update(options)
        if not (self._serializer and self._serializer_options == soptions):
            self._serializer = html5lib.serializer.HTMLSerializer(**soptions)
            self._serializer_options = soptions
        return self._serializer
github django-fluent / django-fluent-contents / fluent_contents / utils / html.py View on Github external
"""
    parser_kwargs = {}
    serializer_kwargs = {}
    if sanitize:
        if HTMLSanitizer is None:
            # new syntax as of 0.99999999/1.0b9 (Released on July 14, 2016)
            serializer_kwargs["sanitize"] = True
        else:
            parser_kwargs["tokenizer"] = HTMLSanitizer

    p = HTMLParser(tree=treebuilders.getTreeBuilder("dom"), **parser_kwargs)
    dom_tree = p.parseFragment(input)
    walker = treewalkers.getTreeWalker("dom")
    stream = walker(dom_tree)

    s = HTMLSerializer(omit_optional_tags=False, **serializer_kwargs)
    return "".join(s.serialize(stream))