Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_extract_references_from_url(pdf_files):
with open(pdf_files[0], 'rb') as fd:
url = "http://arxiv.org/pdf/1503.07589v1.pdf"
responses.add(
responses.GET,
url,
body=fd.read(),
content_type='application/pdf'
)
r = extract_references_from_url(url)
assert len(r) == 36
with pytest.raises(FullTextNotAvailableError):
url = "http://www.example.com"
responses.add(
responses.GET,
url,
body="File not found!",
status=404,
content_type='text/plain',
)
extract_references_from_url(url)
def test_extract_references_from_file(pdf_files):
r = extract_references_from_file(pdf_files[0])
assert 'texkey' in r[0]
assert 'author' in r[0]
assert len(r) == 36
with pytest.raises(FullTextNotAvailableError):
extract_references_from_file(pdf_files[0] + "error")
E.g. you can change that by passing the reference_format:
>>> extract_references_from_file(path, reference_format=u"{title},{volume},{page}")
If you want to also link each reference to some other resource (like a record),
you can provide a linker_callback function to be executed for every reference
element found.
To override KBs for journal names etc., use ``override_kbs_files``:
>>> extract_references_from_file(path, override_kbs_files={'journals': 'my/path/to.kb'})
"""
if not os.path.isfile(path):
raise FullTextNotAvailableError(u"File not found: '{0}'".format(path))
docbody = get_plaintext_document_body(path)
reflines, dummy, dummy = extract_references_from_fulltext(docbody)
if not reflines:
docbody = get_plaintext_document_body(path, keep_layout=True)
reflines, dummy, dummy = extract_references_from_fulltext(docbody)
parsed_refs, stats = parse_references(
reflines,
recid=recid,
reference_format=reference_format,
linker_callback=linker_callback,
override_kbs_files=override_kbs_files,
)
if magic.from_file(path, mime=True) == "application/pdf":
)
os.close(filename)
try:
req = requests.get(
url=url,
headers=headers,
stream=True
)
req.raise_for_status()
with open(filepath, 'wb') as f:
for chunk in req.iter_content(chunk_size):
f.write(chunk)
references = extract_references_from_file(filepath, **kwargs)
except requests.exceptions.HTTPError as exc:
raise FullTextNotAvailableError(f"URL not found: '{url}'") from exc
finally:
os.remove(filepath)
return references