Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_cannot_upload_while_adding(self):
""" Test upload widget absence in creation
We need to save it before the IssueSubmission before
uploading so that file chunks are associated with the issue
submission."""
user = UserFactory()
journal = JournalFactory(members=[user])
AuthorizationFactory.create_can_manage_issue_subscriptions(user, journal)
client = Client(logged_user=user)
response = client.get(
reverse('userspace:journal:editor:add', kwargs={'journal_pk': journal.pk}))
root = etree.HTML(response.content)
assert len(root.cssselect('#id_submissions')) == 0
def test_cannot_upload_while_adding(self):
""" Test upload widget absence in creation
We need to save it before the IssueSubmission before
uploading so that file chunks are associated with the issue
submission."""
self.client.login(username='david', password='top_secret')
response = self.client.get(reverse('editor:add'))
root = etree.HTML(response.content)
self.assertFalse(
root.cssselect('#id_submission_file'),
"The rendered template should not contain an id_submission_file input" # noqa
)
@cachedmethod
def getLikelySeriesUrl(self, name):
data = urlencode({ 'q': name })
html = etree.HTML(urlopen(self.baseUrl + '/search.php', data).read())
matches = [ s.find('a') for s in html.findall(".//div[@style='']") ]
# add baseUrl and remove year information
result = []
for match in matches:
seriesID = int(match.get('href').split('-')[1].split('.')[0]) # remove potential season number
seriesUrl = self.baseUrl + '/tvshow-%d.html' % seriesID
title = match.text
try:
idx = title.find('(') - 1
title = title[:idx]
except: pass
result.append({ 'title': title, 'url': seriesUrl })
if not matches:
def generate_payloads(self, html_code, parent=None):
e = []
o = []
l = []
for index, line in enumerate(html_code.splitlines(), 1):
o.append(line)
l.append("{1}".format(index, line))
tree = etree.HTML(decode_html("\n".join(l))).getroottree()
self.study(tree, entries=e, lines=l, parent=parent)
for elem in e:
# <a href="inject_point"></a>
if elem['type'] == "attrval":
found_node = etree.HTML(l[int(elem['lineno']) - 1]).xpath("//*[@*[re:test(., '{0}', 'i')]]".format(
elem['identifier']), namespaces={'re': "http://exslt.org/regular-expressions"})
if len(found_node) == 1:
self.generateHandler(tree_node=tree, o=o, elem=elem)
# <a>
elif elem['type'] == "attrname":
found_node = etree.HTML(l[int(elem['lineno']) - 1]).xpath("//*[@*[re:test(name(.), '{0}', 'i')]]".format(
elem['identifier']), namespaces={'re': "http://exslt.org/regular-expressions"})
if len(found_node) == 1:
self.generateHandler(tree_node=tree, o=o, elem=elem)
# </a>
def html2xml(x, encoding='ascii'):
if hasattr(x, 'write_c14n'): return x
if not isinstance(x, basestring):
if hasattr(x, '__unicode__'): x = unicode(x)
else: x = str(x)
if isinstance(x, str): x = unicode(x, encoding)
return etree.HTML(x)
pass
if "apod.nasa.gov" in url.netloc:
try:
file = urllib2.urlopen(url.geturl(), context=self.context)
tree = etree.HTML(file.read())
directUrl = tree.xpath('//img/@src')
if len(directUrl):
return "http://apod.nasa.gov/apod/" + directUrl[0]
except:
pass
if "wikipedia.org" in url.netloc and "File:" in url.path:
try:
file = urllib2.urlopen(url.geturl(), context=self.context)
tree = etree.HTML(file.read())
directUrl = tree.xpath('//div[@class="fullMedia"]/a/@href')[0]
if len(directUrl):
return "http:" + directUrl
except:
pass
return None
def requests_company_detail_data(company_id):
"""请求公司详情页数据"""
headers = generate_http_header()
crawler_sleep()
try:
response = requests.get(
url=constants.COMPANY_DETAIL_URL.format(company_id=company_id),
headers=headers,
cookies=Cookies.get_random_cookies(),
allow_redirects=False,
timeout=constants.TIMEOUT)
except RequestException as e:
logging.error(e)
raise RequestsError(error_log=e)
html = etree.HTML(response.text)
advantage = html.xpath('//div[@id="tags_container"]//li/text()')
size = html.xpath('//div[@id="basic_container"]//li[3]/span/text()')
address = html.xpath('//p[@class="mlist_li_desc"]/text()')
introduce = html.xpath('//span[@class="company_content"]//text()')
return format_tag(advantage, address, size, introduce, company_id)
def get_api_url(wordpress_url):
response = urllib2.urlopen(wordpress_url)
data = etree.HTML(response.read())
u = data.xpath('//link[@rel="https://api.w.org/"]/@href')[0]
# check if we have permalinks
if 'rest_route' in u:
print(' ! Warning, looks like permalinks are not enabled. This might not work!')
return u
def get_captcha_info(self, pdf_captcha_response):
"""Get captcha information with PDF captcha response
Args:
pdf_captcha_response: PDF captcha response
Returns:
captcha_id: Captcha ID
captcha_img_url: Captcha image URL
"""
captcha_id, captcha_img_url = None, None
html = etree.HTML(pdf_captcha_response.content)
imgs = html.xpath('//img[@id="captcha"]')
ids = html.xpath('//input[@name="id"]')
if len(imgs) > 0 and len(ids) > 0:
captcha_id = ids[0].attrib['value']
captcha_img_src = imgs[0].attrib['src']
if captcha_img_src.startswith('http'):
captcha_img_url = captcha_img_src
else:
scheme, netloc, *_ = urlparse(pdf_captcha_response.url, scheme='http')
captcha_img_url = scheme + '://' + netloc + captcha_img_src
return captcha_id, captcha_img_url
def _get_html(cls, html, url, html_etree, params, **kwargs):
if html:
html = etree.HTML(html)
elif url:
if not kwargs.get('headers', None):
kwargs['headers'] = {
"User-Agent": get_random_user_agent()
}
response = requests.get(url, params, **kwargs)
response.raise_for_status()
content = response.content
charset = cchardet.detect(content)
text = content.decode(charset['encoding'])
html = etree.HTML(text)
elif html_etree is not None:
return html_etree
else:
raise ValueError("html(url or html_etree) is expected")
return html