Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_tokenize_multiline_II():
# Make sure multiline string having no newlines have the end marker on
# same line
fundef = '''""""'''
token_list = _get_token_list(fundef)
assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''),
PythonToken(ENDMARKER, '', (1, 4), '')]
yield PythonToken(STRING, token, spos, prefix)
elif token in fstring_pattern_map: # The start of an fstring.
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
yield PythonToken(FSTRING_START, token, spos, prefix)
elif is_identifier(initial): # ordinary name
if token in always_break_tokens:
fstring_stack.clear()
paren_level = 0
while True:
indent = indents.pop()
if indent > start:
yield PythonToken(DEDENT, '', spos, '')
else:
indents.append(indent)
break
yield PythonToken(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
additional_prefix += prefix + line[start:]
break
else:
if token in '([{':
if fstring_stack:
fstring_stack[-1].open_parentheses(token)
else:
paren_level += 1
elif token in ')]}':
if fstring_stack:
fstring_stack[-1].close_parentheses(token)
else:
paren_level -= 1
elif token == ':' and fstring_stack \
and fstring_stack[-1].parentheses_count == 1:
(lnum, spos[1] + 1),
prefix=''
)
del fstring_stack[fstring_index:]
pos -= len(token) - end
continue
if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')):
yield PythonToken(NUMBER, token, spos, prefix)
elif initial in '\r\n':
if any(not f.allow_multiline() for f in fstring_stack):
fstring_stack.clear()
if not new_line and paren_level == 0 and not fstring_stack:
yield PythonToken(NEWLINE, token, spos, prefix)
else:
additional_prefix = prefix + token
new_line = True
elif initial == '#': # Comments
assert not token.endswith("\n")
additional_prefix = prefix + token
elif token in triple_quoted:
endprog = endpats[token]
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
yield PythonToken(STRING, token, spos, prefix)
else:
contstr_start = (lnum, start) # multiple lines
contstr = line[start:]
is_first_token = False
# In case of omitted_first_indent, it might not be dedented fully.
# However this is a sign for us that a dedent happened.
if typ == DEDENT \
or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
indents.pop()
if omitted_first_indent and not indents:
# We are done here, only thing that can come now is an
# endmarker or another dedented code block.
typ, string, start_pos, prefix = next(tokens)
if '\n' in prefix:
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
else:
prefix = ''
yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
break
elif typ == NEWLINE and start_pos[0] >= until_line:
yield PythonToken(typ, string, start_pos, prefix)
# Check if the parser is actually in a valid suite state.
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
start_pos = start_pos[0] + 1, 0
while len(indents) > int(omitted_first_indent):
indents.pop()
yield PythonToken(DEDENT, '', start_pos, '')
yield PythonToken(ENDMARKER, '', start_pos, '')
break
else:
continue
yield PythonToken(typ, string, start_pos, prefix)
if new_line and paren_level == 0 and not fstring_stack:
for t in dedent_if_necessary(match.end()):
yield t
pos = match.end()
new_line = False
yield PythonToken(
ERRORTOKEN, line[pos], (lnum, pos),
additional_prefix + match.group(0)
)
additional_prefix = ''
pos += 1
continue
if (initial in numchars # ordinary number
or (initial == '.' and token != '.' and token != '...')):
yield PythonToken(NUMBER, token, spos, prefix)
elif pseudomatch.group(3) is not None: # ordinary name
if token in always_break_tokens and (fstring_stack or paren_level):
fstring_stack[:] = []
paren_level = 0
# We only want to dedent if the token is on a new line.
m = re.match(r'[ \f\t]*$', line[:start])
if m is not None:
for t in dedent_if_necessary(m.end()):
yield t
if is_identifier(token):
yield PythonToken(NAME, token, spos, prefix)
else:
for t in _split_illegal_unicode_name(token, spos, prefix):
yield t # yield from Python 2
elif initial in '\r\n':
if any(not f.allow_multiline() for f in fstring_stack):
break
elif initial in single_quoted or \
token[:2] in single_quoted or \
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
contstr_start = lnum, start
endprog = (endpats.get(initial) or endpats.get(token[1])
or endpats.get(token[2]))
contstr = line[start:]
contline = line
break
else: # ordinary string
yield PythonToken(STRING, token, spos, prefix)
elif token in fstring_pattern_map: # The start of an fstring.
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
yield PythonToken(FSTRING_START, token, spos, prefix)
elif is_identifier(initial): # ordinary name
if token in always_break_tokens:
fstring_stack.clear()
paren_level = 0
while True:
indent = indents.pop()
if indent > start:
yield PythonToken(DEDENT, '', spos, '')
else:
indents.append(indent)
break
yield PythonToken(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
additional_prefix += prefix + line[start:]
break
else:
start -= 1
if start > indents[-1]:
yield PythonToken(INDENT, '', spos, '')
indents.append(start)
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
if fstring_stack:
fstring_index, end = _check_fstring_ending(fstring_stack, token)
if fstring_index is not None:
if end != 0:
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
yield PythonToken(
FSTRING_END,
fstring_stack[fstring_index].quote,
(lnum, spos[1] + 1),
prefix=''
)
del fstring_stack[fstring_index:]
pos -= len(token) - end
continue
if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')):
yield PythonToken(NUMBER, token, spos, prefix)
elif initial in '\r\n':
if any(not f.allow_multiline() for f in fstring_stack):
if '\n' in prefix:
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
else:
prefix = ''
yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
break
elif typ == NEWLINE and start_pos[0] >= until_line:
yield PythonToken(typ, string, start_pos, prefix)
# Check if the parser is actually in a valid suite state.
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
start_pos = start_pos[0] + 1, 0
while len(indents) > int(omitted_first_indent):
indents.pop()
yield PythonToken(DEDENT, '', start_pos, '')
yield PythonToken(ENDMARKER, '', start_pos, '')
break
else:
continue
yield PythonToken(typ, string, start_pos, prefix)
initial = token[0]
if new_line and initial not in '\r\n#':
new_line = False
if paren_level == 0 and not fstring_stack:
i = 0
while line[i] == '\f':
i += 1
# TODO don't we need to change spos as well?
start -= 1
if start > indents[-1]:
yield PythonToken(INDENT, '', spos, '')
indents.append(start)
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
if fstring_stack:
fstring_index, end = _check_fstring_ending(fstring_stack, token)
if fstring_index is not None:
if end != 0:
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
yield PythonToken(
FSTRING_END,
fstring_stack[fstring_index].quote,
(lnum, spos[1] + 1),
prefix=''
)
if start_pos[1] - len(prefix) == 0:
prefix = ''
yield PythonToken(
ENDMARKER, '',
start_pos,
prefix
)
break
elif typ == NEWLINE and token.start_pos[0] >= until_line:
was_newline = True
elif was_newline:
was_newline = False
if len(indents) == initial_indentation_count:
# Check if the parser is actually in a valid suite state.
if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
yield PythonToken(ENDMARKER, '', token.start_pos, '')
break
if typ == NAME and token.string in ('class', 'def'):
self._keyword_token_indents[token.start_pos] = list(indents)
yield token