Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_re_field_get_nothing_with_no_default():
field = RegexField(re_select="nothing to match.")
try:
field.extract(html=HTML)
except Exception as e:
assert isinstance(e, NothingMatchedError)
def test_re_field_with_named_groups():
field = RegexField(re_select='<h1><a href="(?P<href>.*?)">(?P</a></h1>')
result = field.extract(html=HTML)
assert result["href"] == "https://github.com"
assert result["text"] == "Github"
def test_re_field_in_dict_format_with_many():
field = RegexField(
re_select='<a href="(?P<href>.*?)" class="test_link">(?P</a>',
many=True,
)
matches = field.extract(html=HTML)
assert len(matches) == 5
assert matches[0]["href"] == "https://github.com/howie6879/"
assert matches[0]["text"] == "hello1 github."
assert matches[4]["href"] == "https://github.com/howie6879/"
assert matches[4]["text"] == "hello5 github."
def test_re_field_with_default():
field = RegexField(re_select="nothing to match.", default="default value")
result = field.extract(html=HTML)
assert result == "default value"
def test_re_field_with_many():
field = RegexField(
re_select='<a href="(.*?)" class="test_link">(.*?)</a>', many=True
)
matches = field.extract(html=HTML)
assert len(matches) == 5
href0, text0 = matches[0]
href4, text4 = matches[4]
assert href0 == "https://github.com/howie6879/"
assert text0 == "hello1 github."
assert href4 == "https://github.com/howie6879/"
assert text4 == "hello5 github."
def test_regex_field():
title = ruia.RegexField(re_select='<div href="(.*?)" class="title">(.*?)</div>')
assert title.extract(html=HTML)[0] == "/"
assert title.extract(html=HTML)[1] == "Ruia Documentation"
tags = ruia.RegexField(
re_select='<li href="(?P<href>.*?)" class="tag">(?P</li>', many=True
)
result = tags.extract(html=HTML)
assert isinstance(result, list)
assert len(result) == 3
assert isinstance(result[0], dict)
assert result[0]["href"] == "./easy.html"
def test_re_field_with_html_element():
field = RegexField(re_select='<h1><a href="(?P<href>.*?)">(?P</a></h1>')
result = field.extract(html=html_etree)
assert result["href"] == "https://github.com"
assert result["text"] == "Github"
def test_re_field_with_no_group():
field = RegexField(re_select="<title>.*?</title>")
href = field.extract(html=HTML)
assert href == "<title>ruia</title>"
def test_regex_field():
title = ruia.RegexField(re_select='<div href="(.*?)" class="title">(.*?)</div>')
assert title.extract(html=HTML)[0] == "/"
assert title.extract(html=HTML)[1] == "Ruia Documentation"
tags = ruia.RegexField(
re_select='<li href="(?P<href>.*?)" class="tag">(?P</li>', many=True
)
result = tags.extract(html=HTML)
assert isinstance(result, list)
assert len(result) == 3
assert isinstance(result[0], dict)
assert result[0]["href"] == "./easy.html"
def test_re_field_with_many_groups():
field = RegexField(re_select='<h1><a href="(.*?)">(.*?)</a></h1>')
href, text = field.extract(html=HTML)
assert href == "https://github.com"
assert text == "Github"