Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if innerHTML:
innerHTML = str(innerHTML, "utf8")
if errors:
errors = str(errors, "utf8")
errors = errors.split("\n")
expected = str(expected, "utf8")
try:
if innerHTML:
document = p.parseFragment(io.BytesIO(input), innerHTML)
else:
try:
document = p.parse(io.BytesIO(input))
except constants.DataLossWarning:
sys.stderr.write("Test input causes known dataloss, skipping")
return
except:
errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"),
"\nExpected:", expected,
"\nTraceback:", traceback.format_exc()])
self.assertTrue(False, errorMsg)
output = convertTreeDump(p.tree.testSerializer(document))
output = attrlist.sub(sortattrs, output)
expected = convertExpected(expected)
expected = attrlist.sub(sortattrs, expected)
errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"),
"\nExpected:", expected,
"\nReceived:", output])
def import_wiki(filename='wiki', hostname='localhost', port=8080):
f = codecs.open(filename, encoding='utf-8')
wikitext = f.read()
f.close()
parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
soup = parser.parse(wikitext)
store_area = soup.find('div', id='storeArea')
divs = store_area.findAll('div')
_do_recipe(hostname, port)
_do_bag(hostname, port)
for tiddler in divs:
_do_tiddler(hostname, port, tiddler)
:copyright: Copyright 2007-2019 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
import xml.etree.cElementTree as ElementTree
from hashlib import md5
import pytest
from html5lib import getTreeBuilder, HTMLParser
from test_build_html import flat_dict, tail_check, check_xpath
from sphinx.util.docutils import is_html5_writer_available
TREE_BUILDER = getTreeBuilder('etree', implementation=ElementTree)
HTML_PARSER = HTMLParser(TREE_BUILDER, namespaceHTMLElements=False)
etree_cache = {}
@pytest.mark.skipif(not is_html5_writer_available(), reason='HTML5 writer is not available')
@pytest.fixture(scope='module')
def cached_etree_parse():
def parse(fname):
if fname in etree_cache:
return etree_cache[fname]
with (fname).open('rb') as fp:
etree = HTML_PARSER.parse(fp)
etree_cache.clear()
etree_cache[fname] = etree
return etree
def _parse(content):
parser = html5lib.liberalxmlparser.XMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
soup = parser.parseFragment(content)
tiddler_div = soup.find('div')
return tiddler_div
parsers = {"html":lambda x:html5lib.parse(x, treebuilder="etree"),
"xhtml":lambda x:ElementTree.parse(x, XMLParser.XMLParser()),
def insertComment(self, data, parent=None):
if not self.rootInserted:
self.initialComments.append(data)
else:
_base.TreeBuilder.insertComment(self, data, parent)
try:
kwargs[opt] = getattr(opts, opt)
except:
pass
if not kwargs['quote_char']:
del kwargs['quote_char']
if opts.sanitize:
kwargs["sanitize"] = True
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
if sys.version_info[0] >= 3:
encoding = None
else:
encoding = "utf-8"
for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
sys.stdout.write(text)
if not text.endswith('\n'):
sys.stdout.write('\n')
if opts.error:
errList = []
for pos, errorcode, datavars in parser.errors:
errList.append("Line %i Col %i" % pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
sys.stdout.write("\nParse errors:\n" + "\n".join(errList) + "\n")
def test_all_tokens(self):
expected = [
{'data': [], 'type': 'StartTag', 'name': 'html'},
{'data': [], 'type': 'StartTag', 'name': 'head'},
{'data': [], 'type': 'EndTag', 'name': 'head'},
{'data': [], 'type': 'StartTag', 'name': 'body'},
{'data': 'a', 'type': 'Characters'},
{'data': [], 'type': 'StartTag', 'name': 'div'},
{'data': 'b', 'type': 'Characters'},
{'data': [], 'type': 'EndTag', 'name': 'div'},
{'data': 'c', 'type': 'Characters'},
{'data': [], 'type': 'EndTag', 'name': 'body'},
{'data': [], 'type': 'EndTag', 'name': 'html'}
]
for treeName, treeCls in treeTypes.items():
p = html5parser.HTMLParser(tree = treeCls["builder"])
document = p.parse("a<div>b</div>c")
document = treeCls.get("adapter", lambda x: x)(document)
output = treeCls["walker"](document)
for expectedToken, outputToken in zip(expected, output):
self.assertEquals(expectedToken, outputToken)
def sanitize_html(stream):
return ''.join([token.toxml() for token in
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
parseFragment(stream).childNodes])
def sanitize_html(self,stream):
return ''.join([token.toxml() for token in
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
parseFragment(stream).childNodes])