1161 lines
41 KiB
Python
1161 lines
41 KiB
Python
|
|
import re, collections, enum
|
||
|
|
|
||
|
|
from bashlex import flags, shutils, utils, errors, heredoc, state
|
||
|
|
|
||
|
|
sh_syntaxtab = collections.defaultdict(set)
|
||
|
|
|
||
|
|
def _addsyntax(chars, symbol):
|
||
|
|
for c in chars:
|
||
|
|
sh_syntaxtab[c].add(symbol)
|
||
|
|
|
||
|
|
_addsyntax('\\`$"\n', 'dquote')
|
||
|
|
_addsyntax('()<>;&|', 'meta')
|
||
|
|
_addsyntax('"`\'', 'quote')
|
||
|
|
_addsyntax('$<>', 'exp')
|
||
|
|
_addsyntax("()<>;&| \t\n", 'break')
|
||
|
|
|
||
|
|
def _shellblank(c):
|
||
|
|
return c in ' \t'
|
||
|
|
|
||
|
|
def _shellmeta(c):
|
||
|
|
return 'meta' in sh_syntaxtab[c]
|
||
|
|
|
||
|
|
def _shellquote(c):
|
||
|
|
return 'quote' in sh_syntaxtab[c]
|
||
|
|
|
||
|
|
def _shellexp(c):
|
||
|
|
return 'exp' in sh_syntaxtab[c]
|
||
|
|
|
||
|
|
def _shellbreak(c):
|
||
|
|
return 'break' in sh_syntaxtab[c]
|
||
|
|
|
||
|
|
class tokentype(enum.Enum):
|
||
|
|
IF = 1
|
||
|
|
THEN = 2
|
||
|
|
ELSE = 3
|
||
|
|
ELIF = 4
|
||
|
|
FI = 5
|
||
|
|
CASE = 6
|
||
|
|
ESAC = 7
|
||
|
|
FOR = 8
|
||
|
|
SELECT = 9
|
||
|
|
WHILE = 10
|
||
|
|
UNTIL = 11
|
||
|
|
DO = 12
|
||
|
|
DONE = 13
|
||
|
|
FUNCTION = 14
|
||
|
|
COPROC = 15
|
||
|
|
COND_START = 16
|
||
|
|
COND_END = 17
|
||
|
|
# https://github.com/idank/bashlex/issues/20
|
||
|
|
# COND_ERROR = 18
|
||
|
|
IN = 19
|
||
|
|
BANG = '!'
|
||
|
|
TIME = 21
|
||
|
|
TIMEOPT = 22
|
||
|
|
TIMEIGN = 23
|
||
|
|
WORD = 24
|
||
|
|
ASSIGNMENT_WORD = 25
|
||
|
|
REDIR_WORD = 26
|
||
|
|
NUMBER = 27
|
||
|
|
ARITH_CMD = 28
|
||
|
|
ARITH_FOR_EXPRS = 29
|
||
|
|
COND_CMD = 30
|
||
|
|
AND_AND = '&&'
|
||
|
|
OR_OR = '||'
|
||
|
|
GREATER_GREATER = '>>'
|
||
|
|
LESS_LESS = '<<'
|
||
|
|
LESS_AND = '<&'
|
||
|
|
LESS_LESS_LESS = '<<<'
|
||
|
|
GREATER_AND = '>&'
|
||
|
|
SEMI_SEMI = ';;'
|
||
|
|
SEMI_AND = ';&'
|
||
|
|
SEMI_SEMI_AND = ';;&'
|
||
|
|
LESS_LESS_MINUS = '<<-'
|
||
|
|
AND_GREATER = '&>'
|
||
|
|
AND_GREATER_GREATER = '&>>'
|
||
|
|
LESS_GREATER = '<>'
|
||
|
|
GREATER_BAR = '>|'
|
||
|
|
BAR_AND = '|&'
|
||
|
|
LEFT_CURLY = 47
|
||
|
|
RIGHT_CURLY = 48
|
||
|
|
EOF = '$end'
|
||
|
|
LEFT_PAREN = '('
|
||
|
|
RIGHT_PAREN = ')'
|
||
|
|
BAR = '|'
|
||
|
|
SEMICOLON = ';'
|
||
|
|
DASH = '-'
|
||
|
|
NEWLINE = '\n'
|
||
|
|
LESS = '<'
|
||
|
|
GREATER = '>'
|
||
|
|
AMPERSAND = '&'
|
||
|
|
|
||
|
|
_reserved = set([
|
||
|
|
tokentype.AND_AND, tokentype.BANG, tokentype.BAR_AND, tokentype.DO,
|
||
|
|
tokentype.DONE, tokentype.ELIF, tokentype.ELSE, tokentype.ESAC,
|
||
|
|
tokentype.FI, tokentype.IF, tokentype.OR_OR, tokentype.SEMI_SEMI,
|
||
|
|
tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND, tokentype.THEN,
|
||
|
|
tokentype.TIME, tokentype.TIMEOPT, tokentype.TIMEIGN, tokentype.COPROC,
|
||
|
|
tokentype.UNTIL, tokentype.WHILE])
|
||
|
|
|
||
|
|
for c in '\n;()|&{}':
|
||
|
|
_reserved.add(c)
|
||
|
|
|
||
|
|
# word_token_alist
|
||
|
|
valid_reserved_first_command = {
|
||
|
|
"if" : tokentype.IF,
|
||
|
|
"then" : tokentype.THEN,
|
||
|
|
"else" : tokentype.ELSE,
|
||
|
|
"elif" : tokentype.ELIF,
|
||
|
|
"fi" : tokentype.FI,
|
||
|
|
"case" : tokentype.CASE,
|
||
|
|
"esac" : tokentype.ESAC,
|
||
|
|
"for" : tokentype.FOR,
|
||
|
|
"select" : tokentype.SELECT,
|
||
|
|
"while" : tokentype.WHILE,
|
||
|
|
"until" : tokentype.UNTIL,
|
||
|
|
"do" : tokentype.DO,
|
||
|
|
"done" : tokentype.DONE,
|
||
|
|
"in" : tokentype.IN,
|
||
|
|
"function" : tokentype.FUNCTION,
|
||
|
|
"time" : tokentype.TIME,
|
||
|
|
"{" : tokentype.LEFT_CURLY,
|
||
|
|
"}" : tokentype.RIGHT_CURLY,
|
||
|
|
"!" : tokentype.BANG,
|
||
|
|
"[[" : tokentype.COND_START,
|
||
|
|
"]]" : tokentype.COND_END,
|
||
|
|
"coproc" : tokentype.COPROC
|
||
|
|
}
|
||
|
|
|
||
|
|
class MatchedPairError(errors.ParsingError):
|
||
|
|
def __init__(self, startline, message, tokenizer):
|
||
|
|
# TODO use startline?
|
||
|
|
super(MatchedPairError, self).__init__(message,
|
||
|
|
tokenizer.source,
|
||
|
|
tokenizer._shell_input_line_index - 1)
|
||
|
|
|
||
|
|
wordflags = flags.word
|
||
|
|
parserflags = flags.parser
|
||
|
|
|
||
|
|
class token(object):
|
||
|
|
def __init__(self, type_, value, pos=None, flags=None):
|
||
|
|
if type_ is not None:
|
||
|
|
assert isinstance(type_, tokentype)
|
||
|
|
|
||
|
|
if flags is None:
|
||
|
|
flags = set()
|
||
|
|
|
||
|
|
self.ttype = type_
|
||
|
|
|
||
|
|
self.value = value
|
||
|
|
if pos is not None:
|
||
|
|
self.lexpos = pos[0]
|
||
|
|
self.endlexpos = pos[1]
|
||
|
|
assert self.lexpos < self.endlexpos, (self.lexpos, self.endlexpos)
|
||
|
|
else:
|
||
|
|
self.lexpos = self.endlexpos = None
|
||
|
|
|
||
|
|
self.flags = flags
|
||
|
|
|
||
|
|
@property
|
||
|
|
def type(self):
|
||
|
|
if self.ttype:
|
||
|
|
# make yacc see our EOF token as its own special one $end
|
||
|
|
if self.ttype == tokentype.EOF:
|
||
|
|
return '$end'
|
||
|
|
else:
|
||
|
|
return self.ttype.name
|
||
|
|
|
||
|
|
def __nonzero__(self):
|
||
|
|
return not (self.ttype is None and self.value is None)
|
||
|
|
|
||
|
|
__bool__ = __nonzero__
|
||
|
|
|
||
|
|
def __eq__(self, other):
|
||
|
|
return isinstance(other, token) and (self.type == other.type and
|
||
|
|
self.value == other.value and
|
||
|
|
self.lexpos == other.lexpos and
|
||
|
|
self.endlexpos == other.endlexpos and
|
||
|
|
self.flags == other.flags)
|
||
|
|
|
||
|
|
def __repr__(self):
|
||
|
|
s = ['<', self.type]
|
||
|
|
if self.lexpos is not None and self.endlexpos is not None:
|
||
|
|
s.append('@%d:%d' % (self.lexpos, self.endlexpos))
|
||
|
|
if self.value:
|
||
|
|
s.append(' ')
|
||
|
|
s.append(repr(self.value))
|
||
|
|
|
||
|
|
if self.flags:
|
||
|
|
prettyflags = ' '.join([e.name for e in self.flags])
|
||
|
|
s.append(' (%s)' % prettyflags)
|
||
|
|
s.append('>')
|
||
|
|
return ''.join(s)
|
||
|
|
|
||
|
|
def nopos(self):
|
||
|
|
return self.__class__(self.ttype, self.value, flags=self.flags)
|
||
|
|
|
||
|
|
eoftoken = token(tokentype.EOF, None)
|
||
|
|
|
||
|
|
class tokenizer(object):
|
||
|
|
def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
|
||
|
|
lastreadtoken=None, tokenbeforethat=None, twotokensago=None):
|
||
|
|
self._shell_eof_token = eoftoken
|
||
|
|
self._shell_input_line = s
|
||
|
|
self._added_newline = False
|
||
|
|
if self._shell_input_line and self._shell_input_line[-1] != '\n':
|
||
|
|
self._shell_input_line += '\n' # bash/parse.y L2431
|
||
|
|
self._added_newline = True
|
||
|
|
self._shell_input_line_index = 0
|
||
|
|
# self._shell_input_line_terminator = None
|
||
|
|
self._two_tokens_ago = twotokensago or token(None, None)
|
||
|
|
self._token_before_that = tokenbeforethat or token(None, None)
|
||
|
|
self._last_read_token = lastreadtoken or token(None, None)
|
||
|
|
self._current_token = token(None, None)
|
||
|
|
|
||
|
|
# This implements one-character lookahead/lookbehind across physical
|
||
|
|
# input lines, to avoid something being lost because it's pushed back
|
||
|
|
# with shell_ungetc when we're at the start of a line.
|
||
|
|
self._eol_ungetc_lookahead = None
|
||
|
|
|
||
|
|
# token waiting to be read
|
||
|
|
self._token_to_read = None
|
||
|
|
|
||
|
|
self._parserstate = parserstate
|
||
|
|
self._line_number = 0
|
||
|
|
self._open_brace_count = 0
|
||
|
|
self._esacs_needed_count = 0
|
||
|
|
|
||
|
|
self._dstack = []
|
||
|
|
|
||
|
|
# a stack of positions to record the start and end of a token
|
||
|
|
self._positions = []
|
||
|
|
|
||
|
|
self._strictmode = strictmode
|
||
|
|
|
||
|
|
# hack: the tokenizer needs access to the stack of redirection
|
||
|
|
# nodes when it reads heredocs. this instance is shared between
|
||
|
|
# the tokenizer and the parser, which also needs it
|
||
|
|
self.redirstack = []
|
||
|
|
|
||
|
|
@property
|
||
|
|
def source(self):
|
||
|
|
if self._added_newline:
|
||
|
|
return self._shell_input_line[:-1]
|
||
|
|
return self._shell_input_line
|
||
|
|
|
||
|
|
def __iter__(self):
|
||
|
|
while True:
|
||
|
|
t = self.token()
|
||
|
|
# we're finished when we see the eoftoken OR when we added a newline
|
||
|
|
# to the input and we're there now
|
||
|
|
if t is eoftoken or (self._added_newline and
|
||
|
|
t.lexpos + 1 == len(self._shell_input_line)):
|
||
|
|
break
|
||
|
|
yield t
|
||
|
|
|
||
|
|
def _createtoken(self, type_, value, flags=None):
|
||
|
|
'''create a token with position information'''
|
||
|
|
pos = None
|
||
|
|
assert len(self._positions) >= 2, (type_, value)
|
||
|
|
p2 = self._positions.pop()
|
||
|
|
p1 = self._positions.pop()
|
||
|
|
pos = [p1, p2]
|
||
|
|
return token(type_, value, pos, flags)
|
||
|
|
|
||
|
|
def token(self):
|
||
|
|
self._two_tokens_ago, self._token_before_that, self._last_read_token = \
|
||
|
|
self._token_before_that, self._last_read_token, self._current_token
|
||
|
|
|
||
|
|
self._current_token = self._readtoken()
|
||
|
|
if isinstance(self._current_token, tokentype):
|
||
|
|
self._recordpos()
|
||
|
|
self._current_token = self._createtoken(self._current_token,
|
||
|
|
self._current_token.value)
|
||
|
|
|
||
|
|
if (self._parserstate & parserflags.EOFTOKEN and
|
||
|
|
self._current_token.ttype == self._shell_eof_token):
|
||
|
|
self._current_token = eoftoken
|
||
|
|
# bash/parse.y L2626
|
||
|
|
self._parserstate.discard(parserflags.EOFTOKEN)
|
||
|
|
|
||
|
|
return self._current_token
|
||
|
|
|
||
|
|
def _readtoken(self):
|
||
|
|
character = None
|
||
|
|
peek_char = None
|
||
|
|
|
||
|
|
if self._token_to_read is not None:
|
||
|
|
t = self._token_to_read
|
||
|
|
self._token_to_read = None
|
||
|
|
return t
|
||
|
|
|
||
|
|
# bashlex/parse.y L2989 COND_COMMAND
|
||
|
|
character = self._getc(True)
|
||
|
|
while character is not None and _shellblank(character):
|
||
|
|
character = self._getc(True)
|
||
|
|
|
||
|
|
if character is None:
|
||
|
|
return eoftoken
|
||
|
|
|
||
|
|
if character == '#':
|
||
|
|
self._discard_until('\n')
|
||
|
|
self._getc(False)
|
||
|
|
character = '\n'
|
||
|
|
|
||
|
|
self._recordpos(1)
|
||
|
|
|
||
|
|
if character == '\n':
|
||
|
|
# bashlex/parse.y L3034 ALIAS
|
||
|
|
heredoc.gatherheredocuments(self)
|
||
|
|
|
||
|
|
self._parserstate.discard(parserflags.ASSIGNOK)
|
||
|
|
return tokentype(character)
|
||
|
|
|
||
|
|
if self._parserstate & parserflags.REGEXP:
|
||
|
|
return self._readtokenword(character)
|
||
|
|
|
||
|
|
if _shellmeta(character) and not (self._parserstate & parserflags.DBLPAREN):
|
||
|
|
self._parserstate.discard(parserflags.ASSIGNOK)
|
||
|
|
peek_char = self._getc(True)
|
||
|
|
|
||
|
|
both = character
|
||
|
|
if peek_char:
|
||
|
|
both += peek_char
|
||
|
|
if character == peek_char:
|
||
|
|
if character == '<':
|
||
|
|
peek_char = self._getc()
|
||
|
|
if peek_char == '-':
|
||
|
|
return tokentype.LESS_LESS_MINUS
|
||
|
|
elif peek_char == '<':
|
||
|
|
return tokentype.LESS_LESS_LESS
|
||
|
|
else:
|
||
|
|
self._ungetc(peek_char)
|
||
|
|
return tokentype.LESS_LESS
|
||
|
|
elif character == '>':
|
||
|
|
return tokentype.GREATER_GREATER
|
||
|
|
elif character == ';':
|
||
|
|
self._parserstate |= parserflags.CASEPAT
|
||
|
|
# bashlex/parse.y L3085 ALIAS
|
||
|
|
peek_char = self._getc()
|
||
|
|
if peek_char == '&':
|
||
|
|
return tokentype.SEMI_SEMI_AND
|
||
|
|
else:
|
||
|
|
self._ungetc(peek_char)
|
||
|
|
return tokentype.SEMI_SEMI
|
||
|
|
elif character == '&':
|
||
|
|
return tokentype.AND_AND
|
||
|
|
elif character == '|':
|
||
|
|
return tokentype.OR_OR
|
||
|
|
# bashlex/parse.y L3105
|
||
|
|
elif both == '<&':
|
||
|
|
return tokentype.LESS_AND
|
||
|
|
elif both == '>&':
|
||
|
|
return tokentype.GREATER_AND
|
||
|
|
elif both == '<>':
|
||
|
|
return tokentype.LESS_GREATER
|
||
|
|
elif both == '>|':
|
||
|
|
return tokentype.GREATER_BAR
|
||
|
|
elif both == '&>':
|
||
|
|
peek_char = self._getc()
|
||
|
|
if peek_char == '>':
|
||
|
|
return tokentype.AND_GREATER_GREATER
|
||
|
|
else:
|
||
|
|
self._ungetc(peek_char)
|
||
|
|
return tokentype.AND_GREATER
|
||
|
|
elif both == '|&':
|
||
|
|
return tokentype.BAR_AND
|
||
|
|
elif both == ';&':
|
||
|
|
return tokentype.SEMI_AND
|
||
|
|
|
||
|
|
self._ungetc(peek_char)
|
||
|
|
if character == ')' and self._last_read_token.value == '(' and self._token_before_that.ttype == tokentype.WORD:
|
||
|
|
self._parserstate.add(parserflags.ALLOWOPNBRC)
|
||
|
|
# bashlex/parse.y L3155
|
||
|
|
|
||
|
|
if character == '(' and not self._parserstate & parserflags.CASEPAT:
|
||
|
|
self._parserstate.add(parserflags.SUBSHELL)
|
||
|
|
elif self._parserstate & parserflags.CASEPAT and character == ')':
|
||
|
|
self._parserstate.discard(parserflags.CASEPAT)
|
||
|
|
elif self._parserstate & parserflags.SUBSHELL and character == ')':
|
||
|
|
self._parserstate.discard(parserflags.SUBSHELL)
|
||
|
|
|
||
|
|
if character not in '<>' or peek_char != '(':
|
||
|
|
return tokentype(character)
|
||
|
|
|
||
|
|
if character == '-' and (self._last_read_token.ttype == tokentype.LESS_AND or self._last_read_token.ttype == tokentype.GREATER_AND):
|
||
|
|
return tokentype(character)
|
||
|
|
|
||
|
|
return self._readtokenword(character)
|
||
|
|
|
||
|
|
def _readtokenword(self, c):
|
||
|
|
d = {}
|
||
|
|
d['all_digit_token'] = c.isdigit()
|
||
|
|
d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False
|
||
|
|
|
||
|
|
tokenword = []
|
||
|
|
|
||
|
|
def handleshellquote():
|
||
|
|
self._push_delimiter(c)
|
||
|
|
try:
|
||
|
|
ttok = self._parse_matched_pair(c, c, c, parsingcommand=(c == '`'))
|
||
|
|
finally:
|
||
|
|
self._pop_delimiter()
|
||
|
|
|
||
|
|
tokenword.append(c)
|
||
|
|
tokenword.extend(ttok)
|
||
|
|
d['all_digit_token'] = False
|
||
|
|
d['quoted'] = True
|
||
|
|
if not d['dollar_present']:
|
||
|
|
d['dollar_present'] = c == '"' and '$' in ttok
|
||
|
|
|
||
|
|
def handleshellexp():
|
||
|
|
peek_char = self._getc()
|
||
|
|
if peek_char == '(' or (c == '$' and peek_char in '{['):
|
||
|
|
# try:
|
||
|
|
if peek_char == '{':
|
||
|
|
ttok = self._parse_matched_pair(cd, '{', '}', firstclose=True, dolbrace=True)
|
||
|
|
elif peek_char == '(':
|
||
|
|
self._push_delimiter(peek_char)
|
||
|
|
ttok = self._parse_comsub(cd, '(', ')', parsingcommand=True)
|
||
|
|
self._pop_delimiter()
|
||
|
|
else:
|
||
|
|
ttok = self._parse_matched_pair(cd, '[', ']')
|
||
|
|
# except MatchedPairError:
|
||
|
|
# return -1
|
||
|
|
|
||
|
|
tokenword.append(c)
|
||
|
|
tokenword.append(peek_char)
|
||
|
|
tokenword.extend(ttok)
|
||
|
|
d['dollar_present'] = True
|
||
|
|
d['all_digit_token'] = False
|
||
|
|
|
||
|
|
# goto next_character
|
||
|
|
elif c == '$' and peek_char in '\'"':
|
||
|
|
self._push_delimiter(peek_char)
|
||
|
|
try:
|
||
|
|
ttok = self._parse_matched_pair(peek_char, peek_char, peek_char,
|
||
|
|
allowesc=(peek_char == "'"))
|
||
|
|
# except MatchedPairError:
|
||
|
|
# return -1
|
||
|
|
finally:
|
||
|
|
self._pop_delimiter()
|
||
|
|
|
||
|
|
#if peek_char == "'":
|
||
|
|
# # XXX ansiexpand
|
||
|
|
# ttok = shutils.single_quote(ttok)
|
||
|
|
#else:
|
||
|
|
# ttok = shutils.double_quote(ttok)
|
||
|
|
|
||
|
|
tokenword.append(c)
|
||
|
|
tokenword.append(peek_char)
|
||
|
|
tokenword.extend(ttok)
|
||
|
|
d['quoted'] = True
|
||
|
|
d['all_digit_token'] = False
|
||
|
|
|
||
|
|
# goto next_character
|
||
|
|
elif c == '$' and peek_char == '$':
|
||
|
|
tokenword.append('$')
|
||
|
|
tokenword.append('$')
|
||
|
|
d['dollar_present'] = True
|
||
|
|
d['all_digit_token'] = False
|
||
|
|
|
||
|
|
# goto next_character
|
||
|
|
else:
|
||
|
|
self._ungetc(peek_char)
|
||
|
|
return True
|
||
|
|
|
||
|
|
# bashlex/parse.y L4699 ARRAY_VARS
|
||
|
|
|
||
|
|
def handleescapedchar():
|
||
|
|
tokenword.append(c)
|
||
|
|
d['all_digit_token'] &= c.isdigit()
|
||
|
|
if not d['dollar_present']:
|
||
|
|
d['dollar_present'] = c == '$'
|
||
|
|
|
||
|
|
while True:
|
||
|
|
if c is None:
|
||
|
|
break
|
||
|
|
|
||
|
|
if d['pass_next_character']:
|
||
|
|
d['pass_next_character'] = False
|
||
|
|
handleescapedchar()
|
||
|
|
# goto escaped_character
|
||
|
|
else:
|
||
|
|
cd = self._current_delimiter()
|
||
|
|
gotonext = False
|
||
|
|
if c == '\\':
|
||
|
|
peek_char = self._getc(False)
|
||
|
|
|
||
|
|
if peek_char == '\n':
|
||
|
|
c = '\n'
|
||
|
|
gotonext = True
|
||
|
|
# goto next_character
|
||
|
|
else:
|
||
|
|
self._ungetc(peek_char)
|
||
|
|
|
||
|
|
if (cd is None or cd == '`' or
|
||
|
|
(cd == '"' and peek_char is not None and
|
||
|
|
'dquote' in sh_syntaxtab[peek_char])):
|
||
|
|
d['pass_next_character'] = True
|
||
|
|
d['quoted'] = True
|
||
|
|
|
||
|
|
handleescapedchar()
|
||
|
|
gotonext = True
|
||
|
|
# goto got_character
|
||
|
|
elif _shellquote(c):
|
||
|
|
handleshellquote()
|
||
|
|
gotonext = True
|
||
|
|
# goto next_character
|
||
|
|
# bashlex/parse.y L4542
|
||
|
|
# bashlex/parse.y L4567
|
||
|
|
elif _shellexp(c):
|
||
|
|
gotonext = not handleshellexp()
|
||
|
|
# bashlex/parse.y L4699
|
||
|
|
if not gotonext:
|
||
|
|
if _shellbreak(c):
|
||
|
|
self._ungetc(c)
|
||
|
|
break
|
||
|
|
else:
|
||
|
|
handleescapedchar()
|
||
|
|
|
||
|
|
# got_character
|
||
|
|
# got_escaped_character
|
||
|
|
|
||
|
|
# tokenword.append(c)
|
||
|
|
# all_digit_token &= c.isdigit()
|
||
|
|
# if not dollar_present:
|
||
|
|
# dollar_present = c == '$'
|
||
|
|
|
||
|
|
# next_character
|
||
|
|
cd = self._current_delimiter()
|
||
|
|
c = self._getc(cd != "'" and not d['pass_next_character'])
|
||
|
|
|
||
|
|
# got_token
|
||
|
|
self._recordpos()
|
||
|
|
|
||
|
|
tokenword = ''.join(tokenword)
|
||
|
|
|
||
|
|
if d['all_digit_token'] and (c in '<>' or self._last_read_token.ttype in (tokentype.LESS_AND, tokentype.GREATER_AND)) and shutils.legal_number(tokenword):
|
||
|
|
return self._createtoken(tokentype.NUMBER, int(tokenword))
|
||
|
|
|
||
|
|
# bashlex/parse.y L4811
|
||
|
|
specialtokentype = self._specialcasetokens(tokenword)
|
||
|
|
if specialtokentype:
|
||
|
|
return self._createtoken(specialtokentype, tokenword)
|
||
|
|
|
||
|
|
if not d['dollar_present'] and not d['quoted'] and self._reserved_word_acceptable(self._last_read_token):
|
||
|
|
if tokenword in valid_reserved_first_command:
|
||
|
|
ttype = valid_reserved_first_command[tokenword]
|
||
|
|
ps = self._parserstate
|
||
|
|
if ps & parserflags.CASEPAT and ttype != tokentype.ESAC:
|
||
|
|
pass
|
||
|
|
elif ttype == tokentype.TIME and not self._time_command_acceptable():
|
||
|
|
pass
|
||
|
|
elif ttype == tokentype.ESAC:
|
||
|
|
ps.discard(parserflags.CASEPAT)
|
||
|
|
ps.discard(parserflags.CASESTMT)
|
||
|
|
elif ttype == tokentype.CASE:
|
||
|
|
ps.add(parserflags.CASESTMT)
|
||
|
|
elif ttype == tokentype.COND_END:
|
||
|
|
ps.discard(parserflags.CONDCMD)
|
||
|
|
ps.discard(parserflags.CONDEXPR)
|
||
|
|
elif ttype == tokentype.COND_START:
|
||
|
|
ps.add(parserflags.CONDCMD)
|
||
|
|
elif ttype == tokentype.LEFT_CURLY:
|
||
|
|
self._open_brace_count += 1
|
||
|
|
elif ttype == tokentype.RIGHT_CURLY and self._open_brace_count:
|
||
|
|
self._open_brace_count -= 1
|
||
|
|
return self._createtoken(ttype, tokenword)
|
||
|
|
|
||
|
|
tokenword = self._createtoken(tokentype.WORD, tokenword, utils.typedset(wordflags))
|
||
|
|
if d['dollar_present']:
|
||
|
|
tokenword.flags.add(wordflags.HASDOLLAR)
|
||
|
|
if d['quoted']:
|
||
|
|
tokenword.flags.add(wordflags.QUOTED)
|
||
|
|
if d['compound_assignment'] and tokenword[-1] == ')':
|
||
|
|
tokenword.flags.add(wordflags.COMPASSIGN)
|
||
|
|
if self._is_assignment(tokenword.value, bool(self._parserstate & parserflags.COMPASSIGN)):
|
||
|
|
tokenword.flags.add(wordflags.ASSIGNMENT)
|
||
|
|
if self._assignment_acceptable(self._last_read_token):
|
||
|
|
tokenword.flags.add(wordflags.NOSPLIT)
|
||
|
|
if self._parserstate & parserflags.COMPASSIGN:
|
||
|
|
tokenword.flags.add(wordflags.NOGLOB)
|
||
|
|
|
||
|
|
# bashlex/parse.y L4865
|
||
|
|
if self._command_token_position(self._last_read_token):
|
||
|
|
pass
|
||
|
|
|
||
|
|
if tokenword.value[0] == '{' and tokenword.value[-1] == '}' and c in '<>':
|
||
|
|
if shutils.legal_identifier(tokenword.value[1:]):
|
||
|
|
# XXX is this needed?
|
||
|
|
tokenword.value = tokenword.value[1:]
|
||
|
|
tokenword.ttype = tokentype.REDIR_WORD
|
||
|
|
|
||
|
|
return tokenword
|
||
|
|
|
||
|
|
if len(tokenword.flags & set([wordflags.ASSIGNMENT, wordflags.NOSPLIT])) == 2:
|
||
|
|
tokenword.ttype = tokentype.ASSIGNMENT_WORD
|
||
|
|
|
||
|
|
if self._last_read_token.ttype == tokentype.FUNCTION:
|
||
|
|
self._parserstate.add(parserflags.ALLOWOPNBRC)
|
||
|
|
self._function_dstart = self._line_number
|
||
|
|
elif self._last_read_token.ttype in (tokentype.CASE, tokentype.SELECT, tokentype.FOR):
|
||
|
|
pass # bashlex/parse.y L4907
|
||
|
|
|
||
|
|
return tokenword
|
||
|
|
|
||
|
|
def _parse_comsub(self, doublequotes, open, close, parsingcommand=False,
|
||
|
|
dquote=False, firstclose=False):
|
||
|
|
peekc = self._getc(False)
|
||
|
|
self._ungetc(peekc)
|
||
|
|
|
||
|
|
if peekc == '(':
|
||
|
|
return self._parse_matched_pair(doublequotes, open, close)
|
||
|
|
|
||
|
|
count = 1
|
||
|
|
dollarok = True
|
||
|
|
|
||
|
|
checkcase = bool(parsingcommand and (doublequotes is None or doublequotes not in "'\"") and not dquote)
|
||
|
|
checkcomment = checkcase
|
||
|
|
|
||
|
|
startlineno = self._line_number
|
||
|
|
heredelim = ''
|
||
|
|
stripdoc = insideheredoc = insidecomment = insideword = insidecase = False
|
||
|
|
readingheredocdelim = False
|
||
|
|
wasdollar = passnextchar = False
|
||
|
|
reservedwordok = True
|
||
|
|
lexfirstind = -1
|
||
|
|
lexrwlen = 0
|
||
|
|
|
||
|
|
ret = ''
|
||
|
|
|
||
|
|
while count:
|
||
|
|
c = self._getc(doublequotes != "'" and not insidecomment and not passnextchar)
|
||
|
|
|
||
|
|
if c is None:
|
||
|
|
raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
|
||
|
|
|
||
|
|
# bashlex/parse.y L3571
|
||
|
|
if c == '\n':
|
||
|
|
if readingheredocdelim and heredelim:
|
||
|
|
readingheredocdelim = False
|
||
|
|
insideheredoc = True
|
||
|
|
lexfirstind = len(ret) + 1
|
||
|
|
elif insideheredoc:
|
||
|
|
tind = lexfirstind
|
||
|
|
while stripdoc and ret[tind] == '\t':
|
||
|
|
tind += 1
|
||
|
|
if ret[tind:] == heredelim:
|
||
|
|
stripdoc = insideheredoc = False
|
||
|
|
heredelim = ''
|
||
|
|
lexfirstind = -1
|
||
|
|
else:
|
||
|
|
lexfirstind = len(ret) + 1
|
||
|
|
# bashlex/parse.y L3599
|
||
|
|
if insideheredoc and c == close and count == 1:
|
||
|
|
tind = lexfirstind
|
||
|
|
while stripdoc and ret[tind] == '\t':
|
||
|
|
tind += 1
|
||
|
|
if ret[tind:] == heredelim:
|
||
|
|
stripdoc = insideheredoc = False
|
||
|
|
heredelim = ''
|
||
|
|
lexfirstind = -1
|
||
|
|
|
||
|
|
if insidecomment or insideheredoc:
|
||
|
|
ret += c
|
||
|
|
|
||
|
|
if insidecomment and c == '\n':
|
||
|
|
insidecomment = False
|
||
|
|
|
||
|
|
continue
|
||
|
|
|
||
|
|
if passnextchar:
|
||
|
|
passnextchar = False
|
||
|
|
# XXX is this needed?
|
||
|
|
# if doublequotes != "'" and c == '\n':
|
||
|
|
# if ret:
|
||
|
|
# ret = ret[:-1]
|
||
|
|
# else:
|
||
|
|
# ret += c
|
||
|
|
ret += c
|
||
|
|
continue
|
||
|
|
|
||
|
|
if _shellbreak(c):
|
||
|
|
insideword = False
|
||
|
|
else:
|
||
|
|
if insideword:
|
||
|
|
lexwlen += 1
|
||
|
|
else:
|
||
|
|
insideword = True
|
||
|
|
lexwlen = 0
|
||
|
|
|
||
|
|
if _shellblank(c) and not readingheredocdelim and not lexrwlen:
|
||
|
|
ret += c
|
||
|
|
continue
|
||
|
|
|
||
|
|
# bashlex/parse.y L3686
|
||
|
|
if readingheredocdelim:
|
||
|
|
if lexfirstind == -1 and not _shellbreak(c):
|
||
|
|
lexfirstind = len(ret)
|
||
|
|
elif lexfirstind >= 0 and not passnextchar and _shellbreak(c):
|
||
|
|
if not heredelim:
|
||
|
|
nestret = ret[lexfirstind:]
|
||
|
|
heredelim = shutils.removequotes(nestret)
|
||
|
|
if c == '\n':
|
||
|
|
insideheredoc = True
|
||
|
|
readingheredocdelim = False
|
||
|
|
lexfirstind = len(ret) + 1
|
||
|
|
else:
|
||
|
|
lexfirstind = -1
|
||
|
|
|
||
|
|
if not reservedwordok and checkcase and not insidecomment and (_shellmeta(c) or c == '\n'):
|
||
|
|
ret += c
|
||
|
|
peekc = self._getc(True)
|
||
|
|
if c == peekc and c in '&|;':
|
||
|
|
ret += peekc
|
||
|
|
reservedwordok = True
|
||
|
|
lexrwlen = 0
|
||
|
|
continue
|
||
|
|
elif c == '\n' or c in '&|;':
|
||
|
|
self._ungetc(peekc)
|
||
|
|
reservedwordok = True
|
||
|
|
lexrwlen = 0
|
||
|
|
continue
|
||
|
|
elif c is None:
|
||
|
|
raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) # pragma: no coverage
|
||
|
|
else:
|
||
|
|
ret = ret[:-1]
|
||
|
|
self._ungetc(peekc)
|
||
|
|
|
||
|
|
# bashlex/parse.y L3761
|
||
|
|
if reservedwordok:
|
||
|
|
if c.islower():
|
||
|
|
ret += c
|
||
|
|
lexrwlen += 1
|
||
|
|
continue
|
||
|
|
elif lexrwlen == 4 and _shellbreak(c):
|
||
|
|
if ret[-4:] == 'case':
|
||
|
|
insidecase = True
|
||
|
|
elif ret[-4:] == 'esac':
|
||
|
|
insidecase = False
|
||
|
|
reservedwordok = False
|
||
|
|
elif (checkcomment and c == '#' and (lexrwlen == 0 or
|
||
|
|
(insideword and lexwlen == 0))):
|
||
|
|
pass
|
||
|
|
elif (not insidecase and (_shellblank(c) or c == '\n') and
|
||
|
|
lexrwlen == 2 and ret[-2:] == 'do'):
|
||
|
|
lexrwlen = 0
|
||
|
|
elif insidecase and c != '\n':
|
||
|
|
reservedwordok = False
|
||
|
|
elif not _shellbreak(c):
|
||
|
|
reservedwordok = False
|
||
|
|
|
||
|
|
if not insidecomment and checkcase and c == '<':
|
||
|
|
ret += c
|
||
|
|
peekc = self._getc(True)
|
||
|
|
if peekc is None:
|
||
|
|
raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
|
||
|
|
if peekc == c:
|
||
|
|
ret += peekc
|
||
|
|
peekc = self._getc(True)
|
||
|
|
if peekc is None:
|
||
|
|
raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
|
||
|
|
elif peekc == '-':
|
||
|
|
ret += peekc
|
||
|
|
stripdoc = True
|
||
|
|
else:
|
||
|
|
self._ungetc(peekc)
|
||
|
|
|
||
|
|
if peekc != '<':
|
||
|
|
readingheredocdelim = True
|
||
|
|
lexfirstind = -1
|
||
|
|
|
||
|
|
continue
|
||
|
|
else:
|
||
|
|
c = peekc
|
||
|
|
elif checkcomment and not insidecomment and c == '#' and ((reservedwordok
|
||
|
|
and lexrwlen == 0) or insideword or lexwlen == 0):
|
||
|
|
insidecomment = True
|
||
|
|
|
||
|
|
if c == close and not insidecase:
|
||
|
|
count -= 1
|
||
|
|
elif not firstclose and not insidecase and c == open:
|
||
|
|
count += 1
|
||
|
|
|
||
|
|
ret += c
|
||
|
|
|
||
|
|
if count == 0:
|
||
|
|
break
|
||
|
|
|
||
|
|
if c == '\\':
|
||
|
|
passnextchar = True
|
||
|
|
|
||
|
|
# bashlex/parse.y L3897
|
||
|
|
if _shellquote(c):
|
||
|
|
self._push_delimiter(c)
|
||
|
|
try:
|
||
|
|
if wasdollar and c == "'":
|
||
|
|
nestret = self._parse_matched_pair(c, c, c,
|
||
|
|
allowesc=True,
|
||
|
|
dquote=True)
|
||
|
|
else:
|
||
|
|
nestret = self._parse_matched_pair(c, c, c,
|
||
|
|
dquote=True)
|
||
|
|
finally:
|
||
|
|
self._pop_delimiter()
|
||
|
|
|
||
|
|
# XXX is this necessary?
|
||
|
|
# if wasdollar and c == "'" and not rdquote:
|
||
|
|
# if not rdquote:
|
||
|
|
# nestret = shutils.single_quote(nestret)
|
||
|
|
# ret = ret[:-2]
|
||
|
|
# elif wasdollar and c == '"' and not rdquote:
|
||
|
|
# nestret = shutils.double_quote(nestret)
|
||
|
|
# ret = ret[:-2]
|
||
|
|
|
||
|
|
ret += nestret
|
||
|
|
# check for $(), $[], or ${} inside command substitution
|
||
|
|
elif wasdollar and c in '({[':
|
||
|
|
if not insidecase and open == c:
|
||
|
|
count -= 1
|
||
|
|
if c == '(':
|
||
|
|
nestret = self._parse_comsub(None, '(', ')',
|
||
|
|
parsingcommand=True,
|
||
|
|
dquote=False)
|
||
|
|
elif c == '{':
|
||
|
|
nestret = self._parse_matched_pair(None, '{', '}',
|
||
|
|
firstclose=True,
|
||
|
|
dolbrace=True,
|
||
|
|
dquote=True)
|
||
|
|
elif c == '[':
|
||
|
|
nestret = self._parse_matched_pair(None, '[', ']',
|
||
|
|
dquote=True)
|
||
|
|
|
||
|
|
ret += nestret
|
||
|
|
|
||
|
|
wasdollar = c == '$'
|
||
|
|
|
||
|
|
return ret
|
||
|
|
|
||
|
|
def _parse_matched_pair(self, doublequotes, open, close, parsingcommand=False, allowesc=False, dquote=False, firstclose=False, dolbrace=False, arraysub=False):
|
||
|
|
count = 1
|
||
|
|
dolbracestate = ''
|
||
|
|
if dolbrace:
|
||
|
|
dolbracestate = 'param'
|
||
|
|
|
||
|
|
insidecomment = False
|
||
|
|
lookforcomments = False
|
||
|
|
sawdollar = False
|
||
|
|
|
||
|
|
if parsingcommand and doublequotes not in "`'\"" and dquote:
|
||
|
|
lookforcomments = True
|
||
|
|
|
||
|
|
rdquote = True if doublequotes == '"' else dquote
|
||
|
|
passnextchar = False
|
||
|
|
startlineno = self._line_number
|
||
|
|
|
||
|
|
ret = ''
|
||
|
|
|
||
|
|
def handledollarword():
|
||
|
|
if open == c:
|
||
|
|
count -= 1
|
||
|
|
|
||
|
|
# bashlex/parse.y L3486
|
||
|
|
if c == '(':
|
||
|
|
return self._parse_comsub(None, '(', ')',
|
||
|
|
parsingcommand=True,
|
||
|
|
dquote=False)
|
||
|
|
elif c == '{':
|
||
|
|
return self._parse_matched_pair(None, '{', '}',
|
||
|
|
firstclose=True,
|
||
|
|
dquote=rdquote,
|
||
|
|
dolbrace=True)
|
||
|
|
elif c == '[':
|
||
|
|
return self._parse_matched_pair(None, '[', ']', dquote=rdquote)
|
||
|
|
else:
|
||
|
|
assert False # pragma: no cover
|
||
|
|
|
||
|
|
while count:
|
||
|
|
c = self._getc(doublequotes != "'" and not passnextchar)
|
||
|
|
if c is None:
|
||
|
|
raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
|
||
|
|
|
||
|
|
# bashlex/parse.y L3285
|
||
|
|
# if c == '\n':
|
||
|
|
# continue
|
||
|
|
|
||
|
|
if insidecomment:
|
||
|
|
ret += c
|
||
|
|
if c == '\n':
|
||
|
|
insidecomment = False
|
||
|
|
continue
|
||
|
|
elif lookforcomments and not insidecomment and c == '#' and (not ret
|
||
|
|
or ret[-1] == '\n' or _shellblank(ret[-1])):
|
||
|
|
insidecomment = True
|
||
|
|
|
||
|
|
# last char was backslash
|
||
|
|
if passnextchar:
|
||
|
|
passnextchar = False
|
||
|
|
#if doublequotes != "'" and c == '\n':
|
||
|
|
# if ret:
|
||
|
|
# ret = ret[:-1]
|
||
|
|
# continue
|
||
|
|
ret += c
|
||
|
|
continue
|
||
|
|
elif c == close:
|
||
|
|
count -= 1
|
||
|
|
elif open != close and sawdollar and open == '{' and c == open:
|
||
|
|
count += 1
|
||
|
|
elif not firstclose and c == open:
|
||
|
|
count += 1
|
||
|
|
|
||
|
|
ret += c
|
||
|
|
if count == 0:
|
||
|
|
break
|
||
|
|
|
||
|
|
if open == "'":
|
||
|
|
if allowesc and c == "\\":
|
||
|
|
passnextchar = True
|
||
|
|
continue
|
||
|
|
if c == "\\":
|
||
|
|
passnextchar = True
|
||
|
|
if dolbrace:
|
||
|
|
if dolbracestate == 'param':
|
||
|
|
if len(ret) > 1:
|
||
|
|
dd = {'%' : 'quote', '#' : 'quote', '/' : 'quote2', '^' : 'quote',
|
||
|
|
',' : 'quote'}
|
||
|
|
if c in dd:
|
||
|
|
dolbracestate = dd[c]
|
||
|
|
elif c in '#%^,~:-=?+/':
|
||
|
|
dolbracestate = 'op'
|
||
|
|
if dolbracestate == 'op' and c in '#%^,~:-=?+/':
|
||
|
|
dolbracestate = 'word'
|
||
|
|
|
||
|
|
if dolbracestate not in 'quote2' and dquote and dolbrace and c == "'":
|
||
|
|
continue
|
||
|
|
|
||
|
|
if open != close:
|
||
|
|
if _shellquote(c):
|
||
|
|
self._push_delimiter(c)
|
||
|
|
try:
|
||
|
|
if sawdollar and "'":
|
||
|
|
nestret = self._parse_matched_pair(c, c, c, parsingcommand=parsingcommand, allowesc=True, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
|
||
|
|
else:
|
||
|
|
nestret = self._parse_matched_pair(c, c, c, parsingcommand=parsingcommand, allowesc=allowesc, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
|
||
|
|
finally:
|
||
|
|
self._pop_delimiter()
|
||
|
|
|
||
|
|
# bashlex/parse.y L3419
|
||
|
|
if sawdollar and c == "'":
|
||
|
|
pass
|
||
|
|
elif sawdollar and c == '"':
|
||
|
|
ret = ret[:-2] # back up before the $"
|
||
|
|
|
||
|
|
ret += nestret
|
||
|
|
elif arraysub and sawdollar and c in '({[':
|
||
|
|
# goto parse_dollar_word
|
||
|
|
ret += handledollarword()
|
||
|
|
elif open == '"' and c == '`':
|
||
|
|
ret += self._parse_matched_pair(None, '`', '`', parsingcommand=parsingcommand, allowesc=allowesc, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
|
||
|
|
elif open != '`' and sawdollar and c in '({[':
|
||
|
|
ret += handledollarword()
|
||
|
|
|
||
|
|
sawdollar = c == '$'
|
||
|
|
|
||
|
|
return ret
|
||
|
|
|
||
|
|
|
||
|
|
def _is_assignment(self, value, iscompassign):
|
||
|
|
c = value[0]
|
||
|
|
|
||
|
|
def legalvariablechar(x):
|
||
|
|
return x.isalnum() or x == '_'
|
||
|
|
|
||
|
|
if not c.isalpha() and c != '_':
|
||
|
|
return
|
||
|
|
|
||
|
|
for i, c in enumerate(value):
|
||
|
|
if c == '=':
|
||
|
|
return i
|
||
|
|
|
||
|
|
# bash/general.c L289
|
||
|
|
if c == '+' and i + 1 < len(value) and value[i+1] == '=':
|
||
|
|
return i+1
|
||
|
|
|
||
|
|
if not legalvariablechar(c):
|
||
|
|
return False
|
||
|
|
|
||
|
|
def _command_token_position(self, token):
|
||
|
|
return (token.ttype == tokentype.ASSIGNMENT_WORD or
|
||
|
|
self._parserstate & parserflags.REDIRLIST or
|
||
|
|
(token.ttype not in (tokentype.SEMI_SEMI, tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND) and self._reserved_word_acceptable(token)))
|
||
|
|
|
||
|
|
def _assignment_acceptable(self, token):
|
||
|
|
return self._command_token_position(token) and not self._parserstate & parserflags.CASEPAT
|
||
|
|
|
||
|
|
def _time_command_acceptable(self):
|
||
|
|
pass
|
||
|
|
|
||
|
|
def _reserved_word_acceptable(self, tok):
|
||
|
|
if not tok or (tok.ttype in _reserved or tok.value in _reserved):
|
||
|
|
return True
|
||
|
|
# bash/parse.y L4955 cOPROCESS_SUPPORT
|
||
|
|
|
||
|
|
if (self._last_read_token.ttype == tokentype.WORD and
|
||
|
|
self._token_before_that.ttype == tokentype.FUNCTION):
|
||
|
|
return True
|
||
|
|
|
||
|
|
return False
|
||
|
|
|
||
|
|
def _pop_delimiter(self):
|
||
|
|
self._dstack.pop()
|
||
|
|
|
||
|
|
def _push_delimiter(self, c):
|
||
|
|
self._dstack.append(c)
|
||
|
|
|
||
|
|
def _current_delimiter(self):
|
||
|
|
if self._dstack:
|
||
|
|
return self._dstack[-1]
|
||
|
|
|
||
|
|
def _ungetc(self, c):
|
||
|
|
if (self._shell_input_line and self._shell_input_line_index
|
||
|
|
and self._shell_input_line_index <= len(self._shell_input_line)):
|
||
|
|
self._shell_input_line_index -= 1
|
||
|
|
else:
|
||
|
|
self._eol_ungetc_lookahead = c
|
||
|
|
|
||
|
|
def _getc(self, remove_quoted_newline=True):
|
||
|
|
if self._eol_ungetc_lookahead is not None:
|
||
|
|
c = self._eol_ungetc_lookahead
|
||
|
|
self._eol_ungetc_lookahead = None
|
||
|
|
return c
|
||
|
|
|
||
|
|
# bash/parse.y L2220
|
||
|
|
|
||
|
|
while True:
|
||
|
|
if self._shell_input_line_index < len(self._shell_input_line):
|
||
|
|
c = self._shell_input_line[self._shell_input_line_index]
|
||
|
|
self._shell_input_line_index += 1
|
||
|
|
else:
|
||
|
|
c = None
|
||
|
|
|
||
|
|
if c == '\\' and remove_quoted_newline and self._shell_input_line[self._shell_input_line_index] == '\n':
|
||
|
|
self._line_number += 1
|
||
|
|
# skip past the newline
|
||
|
|
self._shell_input_line_index += 1
|
||
|
|
continue
|
||
|
|
else:
|
||
|
|
return c
|
||
|
|
|
||
|
|
#if c is None and self._shell_input_line_terminator is None:
|
||
|
|
# if self._shell_input_line_index != 0:
|
||
|
|
# return '\n'
|
||
|
|
# else:
|
||
|
|
# return None
|
||
|
|
|
||
|
|
#return c
|
||
|
|
|
||
|
|
def _discard_until(self, character):
|
||
|
|
c = self._getc(False)
|
||
|
|
while c is not None and c != character:
|
||
|
|
c = self._getc(False)
|
||
|
|
if c is not None:
|
||
|
|
self._ungetc(c)
|
||
|
|
|
||
|
|
def _recordpos(self, relativeoffset=0):
|
||
|
|
'''record the current index of the tokenizer into the positions stack
|
||
|
|
while adding relativeoffset from it'''
|
||
|
|
self._positions.append(self._shell_input_line_index - relativeoffset)
|
||
|
|
|
||
|
|
def readline(self, removequotenewline):
|
||
|
|
linebuffer = []
|
||
|
|
passnext = indx = 0
|
||
|
|
while True:
|
||
|
|
c = self._getc()
|
||
|
|
if c is None:
|
||
|
|
if indx == 0:
|
||
|
|
return None
|
||
|
|
c = '\n'
|
||
|
|
|
||
|
|
if passnext:
|
||
|
|
linebuffer.append(c)
|
||
|
|
indx += 1
|
||
|
|
passnext = False
|
||
|
|
elif c == '\\' and removequotenewline:
|
||
|
|
peekc = self._getc()
|
||
|
|
if peekc == '\n':
|
||
|
|
self._line_number += 1
|
||
|
|
continue
|
||
|
|
else:
|
||
|
|
self._ungetc(peekc)
|
||
|
|
passnext = True
|
||
|
|
linebuffer.append(c)
|
||
|
|
indx += 1
|
||
|
|
else:
|
||
|
|
linebuffer.append(c)
|
||
|
|
indx += 1
|
||
|
|
|
||
|
|
if c == '\n':
|
||
|
|
return ''.join(linebuffer)
|
||
|
|
|
||
|
|
def _peekc(self, *args):
|
||
|
|
peek_char = self._getc(*args)
|
||
|
|
# only unget if we actually read something
|
||
|
|
if peek_char is not None:
|
||
|
|
self._ungetc(peek_char)
|
||
|
|
return peek_char
|
||
|
|
|
||
|
|
def _specialcasetokens(self, tokstr):
|
||
|
|
if (self._last_read_token.ttype == tokentype.WORD and
|
||
|
|
self._token_before_that.ttype in (tokentype.FOR,
|
||
|
|
tokentype.CASE,
|
||
|
|
tokentype.SELECT) and
|
||
|
|
tokstr == 'in'):
|
||
|
|
if self._token_before_that.ttype == tokentype.CASE:
|
||
|
|
self._parserstate.add(parserflags.CASEPAT)
|
||
|
|
self._esacs_needed_count += 1
|
||
|
|
return tokentype.IN
|
||
|
|
|
||
|
|
if (self._last_read_token.ttype == tokentype.WORD and
|
||
|
|
self._token_before_that.ttype in (tokentype.FOR, tokentype.SELECT) and
|
||
|
|
tokstr == 'do'):
|
||
|
|
return tokentype.DO
|
||
|
|
|
||
|
|
if self._esacs_needed_count:
|
||
|
|
self._esacs_needed_count -= 1
|
||
|
|
if tokstr == 'esac':
|
||
|
|
self._parserstate.discard(parserflags.CASEPAT)
|
||
|
|
return tokentype.ESAC
|
||
|
|
|
||
|
|
if self._parserstate & parserflags.ALLOWOPNBRC:
|
||
|
|
self._parserstate.discard(parserflags.ALLOWOPNBRC)
|
||
|
|
if tokstr == '{':
|
||
|
|
self._open_brace_count += 1
|
||
|
|
# bash/parse.y L2887
|
||
|
|
return tokentype.LEFT_CURLY
|
||
|
|
|
||
|
|
if (self._last_read_token.ttype == tokentype.ARITH_FOR_EXPRS and
|
||
|
|
tokstr == 'do'):
|
||
|
|
return tokentype.DO
|
||
|
|
|
||
|
|
if (self._last_read_token.ttype == tokentype.ARITH_FOR_EXPRS and
|
||
|
|
tokstr == '{'):
|
||
|
|
self._open_brace_count += 1
|
||
|
|
return tokentype.LEFT_CURLY
|
||
|
|
|
||
|
|
if (self._open_brace_count and
|
||
|
|
self._reserved_word_acceptable(self._last_read_token) and
|
||
|
|
tokstr == '}'):
|
||
|
|
self._open_brace_count -= 1
|
||
|
|
return tokentype.RIGHT_CURLY
|
||
|
|
|
||
|
|
if self._last_read_token.ttype == tokentype.TIME and tokstr == '-p':
|
||
|
|
return tokentype.TIMEOPT
|
||
|
|
|
||
|
|
if self._last_read_token.ttype == tokentype.TIMEOPT and tokstr == '--':
|
||
|
|
return tokentype.TIMEIGN
|
||
|
|
|
||
|
|
if self._parserstate & parserflags.CONDEXPR and tokstr == ']]':
|
||
|
|
return tokentype.COND_END
|