Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_no_title_via_brackets():
sect = get_reference_section_beginning([
"Hello",
"[1] Ref1"
"[2] Ref2"
])
assert sect == {
'marker': '[1]',
'marker_pattern': u'(?P<mark>(?P\\[)\\s*(?P\\d+)\\s*(?P\\]))',
'start_line': 1,
'title_string': None,
'title_marker_same_line': False,
'how_found_start': 2,
}</mark>
def test_no_title_via_numbers2():
sect = get_reference_section_beginning([
"Hello",
"1",
"Ref1",
"(3)",
"2",
"Ref2",
])
assert sect, {
'marker': '1',
'marker_pattern': u'(?P<mark>(?P)\\s*(?P\\d+)\\s*(?P))',
'start_line': 1,
'title_string': None,
'title_marker_same_line': False,
'how_found_start': 4,
}</mark>
def test_no_title_via_dots():
sect = get_reference_section_beginning([
"Hello",
"1. Ref1"
"2. Ref2"
])
assert sect == {
'marker': '1.',
'marker_pattern': u'(?P<mark>(?P)\\s*(?P\\d+)\\s*(?P\\.))',
'start_line': 1,
'title_string': None,
'title_marker_same_line': False,
'how_found_start': 3,
}</mark>
def test_no_section():
sect = get_reference_section_beginning("")
assert sect is None
def test_no_title_via_numbers():
sect = get_reference_section_beginning([
"Hello",
"1 Ref1"
"2 Ref2"
])
assert sect == {
'marker': '1',
'marker_pattern': u'(?P<mark>(?P)\\s*(?P\\d+)\\s*(?P))',
'start_line': 1,
'title_string': None,
'title_marker_same_line': False,
'how_found_start': 4,
}</mark>
def test_simple():
sect = get_reference_section_beginning([
"Hello",
"References",
"[1] Ref1"
])
assert sect == {
'marker': '[1]',
'marker_pattern': u'\\s*(?P<mark>\\[\\s*(?P\\d+)\\s*\\])',
'start_line': 1,
'title_string': 'References',
'title_marker_same_line': False,
'how_found_start': 1,
}</mark>
>>> extract_references_from_string(path, reference_format="{title},{volume},{page}")
If you want to also link each reference to some other resource (like a record),
you can provide a linker_callback function to be executed for every reference
element found.
To override KBs for journal names etc., use ``override_kbs_files``:
>>> extract_references_from_string(path, override_kbs_files={'journals': 'my/path/to.kb'})
"""
docbody = source.split('\n')
if not is_only_references:
reflines, dummy, dummy = extract_references_from_fulltext(docbody)
else:
refs_info = get_reference_section_beginning(docbody)
if not refs_info:
refs_info, dummy = find_numeration_in_body(docbody)
refs_info['start_line'] = 0
refs_info['end_line'] = len(docbody) - 1,
reflines = rebuild_reference_lines(
docbody, refs_info['marker_pattern'])
parsed_refs, stats = parse_references(
reflines,
recid=recid,
reference_format=reference_format,
linker_callback=linker_callback,
override_kbs_files=override_kbs_files,
)
return parsed_refs
Return the extracted reference section as a list of strings, whereby each
string in the list is considered to be a single reference line.
E.g. a string could be something like:
'[19] Wilson, A. Unpublished (1986).
@param fulltext: (list) of strings, whereby each string is a line of the
document.
@return: (list) of strings, where each string is an extracted reference
line.
"""
# Try to remove pagebreaks, headers, footers
fulltext = remove_page_boundary_lines(fulltext)
status = 0
# How ref section found flag
how_found_start = 0
# Find start of refs section
ref_sect_start = get_reference_section_beginning(fulltext)
if ref_sect_start is None:
# No References
refs = []
status = 4
LOGGER.debug(u"extract_references_from_fulltext: ref_sect_start is None")
else:
# If a reference section was found, however weak
ref_sect_end = \
find_end_of_reference_section(fulltext,
ref_sect_start["start_line"],
ref_sect_start["marker"],
ref_sect_start["marker_pattern"])
if ref_sect_end is None:
# No End to refs? Not safe to extract
refs = []