wizards/allegro-primus/venv/lib/python3.12/site-packages/bashlex/tokenizer.py

import re, collections, enum

from bashlex import flags, shutils, utils, errors, heredoc, state

sh_syntaxtab = collections.defaultdict(set)

def _addsyntax(chars, symbol):
    for c in chars:
        sh_syntaxtab[c].add(symbol)

_addsyntax('\\`$"\n', 'dquote')
_addsyntax('()<>;&|', 'meta')
_addsyntax('"`\'', 'quote')
_addsyntax('$<>', 'exp')
_addsyntax("()<>;&| \t\n", 'break')

def _shellblank(c):
    return c in ' \t'

def _shellmeta(c):
    return 'meta' in sh_syntaxtab[c]

def _shellquote(c):
    return 'quote' in sh_syntaxtab[c]

def _shellexp(c):
    return 'exp' in sh_syntaxtab[c]

def _shellbreak(c):
    return 'break' in sh_syntaxtab[c]

class tokentype(enum.Enum):
    IF = 1
    THEN = 2
    ELSE = 3
    ELIF = 4
    FI = 5
    CASE = 6
    ESAC = 7
    FOR = 8
    SELECT = 9
    WHILE = 10
    UNTIL = 11
    DO = 12
    DONE = 13
    FUNCTION = 14
    COPROC = 15
    COND_START = 16
    COND_END = 17
    # https://github.com/idank/bashlex/issues/20
    # COND_ERROR = 18
    IN = 19
    BANG = '!'
    TIME = 21
    TIMEOPT = 22
    TIMEIGN = 23
    WORD = 24
    ASSIGNMENT_WORD = 25
    REDIR_WORD = 26
    NUMBER = 27
    ARITH_CMD = 28
    ARITH_FOR_EXPRS = 29
    COND_CMD = 30
    AND_AND = '&&'
    OR_OR = '||'
    GREATER_GREATER = '>>'
    LESS_LESS = '<<'
    LESS_AND = '<&'
    LESS_LESS_LESS = '<<<'
    GREATER_AND = '>&'
    SEMI_SEMI = ';;'
    SEMI_AND = ';&'
    SEMI_SEMI_AND = ';;&'
    LESS_LESS_MINUS = '<<-'
    AND_GREATER = '&>'
    AND_GREATER_GREATER = '&>>'
    LESS_GREATER = '<>'
    GREATER_BAR = '>|'
    BAR_AND = '|&'
    LEFT_CURLY = 47
    RIGHT_CURLY = 48
    EOF = '$end'
    LEFT_PAREN = '('
    RIGHT_PAREN = ')'
    BAR = '|'
    SEMICOLON = ';'
    DASH = '-'
    NEWLINE = '\n'
    LESS = '<'
    GREATER = '>'
    AMPERSAND = '&'

_reserved = set([
    tokentype.AND_AND, tokentype.BANG, tokentype.BAR_AND, tokentype.DO,
    tokentype.DONE, tokentype.ELIF, tokentype.ELSE, tokentype.ESAC,
    tokentype.FI, tokentype.IF, tokentype.OR_OR, tokentype.SEMI_SEMI,
    tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND, tokentype.THEN,
    tokentype.TIME, tokentype.TIMEOPT, tokentype.TIMEIGN, tokentype.COPROC,
    tokentype.UNTIL, tokentype.WHILE])

for c in '\n;()|&{}':
    _reserved.add(c)

# word_token_alist
valid_reserved_first_command = {
    "if" : tokentype.IF,
    "then" : tokentype.THEN,
    "else" : tokentype.ELSE,
    "elif" : tokentype.ELIF,
    "fi" : tokentype.FI,
    "case" : tokentype.CASE,
    "esac" : tokentype.ESAC,
    "for" : tokentype.FOR,
    "select" : tokentype.SELECT,
    "while" : tokentype.WHILE,
    "until" : tokentype.UNTIL,
    "do" : tokentype.DO,
    "done" : tokentype.DONE,
    "in" : tokentype.IN,
    "function" : tokentype.FUNCTION,
    "time" : tokentype.TIME,
    "{" : tokentype.LEFT_CURLY,
    "}" : tokentype.RIGHT_CURLY,
    "!" : tokentype.BANG,
    "[[" : tokentype.COND_START,
    "]]" : tokentype.COND_END,
    "coproc" : tokentype.COPROC
}

class MatchedPairError(errors.ParsingError):
    def __init__(self, startline, message, tokenizer):
        # TODO use startline?
        super(MatchedPairError, self).__init__(message,
                                               tokenizer.source,
                                               tokenizer._shell_input_line_index - 1)

wordflags = flags.word
parserflags = flags.parser

class token(object):
    def __init__(self, type_, value, pos=None, flags=None):
        if type_ is not None:
            assert isinstance(type_, tokentype)

        if flags is None:
            flags = set()

        self.ttype = type_

        self.value = value
        if pos is not None:
            self.lexpos = pos[0]
            self.endlexpos = pos[1]
            assert self.lexpos < self.endlexpos, (self.lexpos, self.endlexpos)
        else:
            self.lexpos = self.endlexpos = None

        self.flags = flags

    @property
    def type(self):
        if self.ttype:
            # make yacc see our EOF token as its own special one $end
            if self.ttype == tokentype.EOF:
                return '$end'
            else:
                return self.ttype.name

    def __nonzero__(self):
        return not (self.ttype is None and self.value is None)

    __bool__ = __nonzero__

    def __eq__(self, other):
        return isinstance(other, token) and (self.type == other.type and
                                             self.value == other.value and
                                             self.lexpos == other.lexpos and
                                             self.endlexpos == other.endlexpos and
                                             self.flags == other.flags)

    def __repr__(self):
        s = ['<', self.type]
        if self.lexpos is not None and self.endlexpos is not None:
            s.append('@%d:%d' % (self.lexpos, self.endlexpos))
        if self.value:
            s.append(' ')
            s.append(repr(self.value))

        if self.flags:
            prettyflags = ' '.join([e.name for e in self.flags])
            s.append(' (%s)' % prettyflags)
        s.append('>')
        return ''.join(s)

    def nopos(self):
        return self.__class__(self.ttype, self.value, flags=self.flags)

eoftoken = token(tokentype.EOF, None)

class tokenizer(object):
    def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
                 lastreadtoken=None, tokenbeforethat=None, twotokensago=None):
        self._shell_eof_token = eoftoken
        self._shell_input_line = s
        self._added_newline = False
        if self._shell_input_line and self._shell_input_line[-1] != '\n':
            self._shell_input_line += '\n' # bash/parse.y L2431
            self._added_newline = True
        self._shell_input_line_index = 0
        # self._shell_input_line_terminator = None
        self._two_tokens_ago = twotokensago or token(None, None)
        self._token_before_that = tokenbeforethat or token(None, None)
        self._last_read_token = lastreadtoken or token(None, None)
        self._current_token = token(None, None)

        # This implements one-character lookahead/lookbehind across physical
        # input lines, to avoid something being lost because it's pushed back
        # with shell_ungetc when we're at the start of a line.
        self._eol_ungetc_lookahead = None

        # token waiting to be read
        self._token_to_read = None

        self._parserstate = parserstate
        self._line_number = 0
        self._open_brace_count = 0
        self._esacs_needed_count = 0

        self._dstack = []

        # a stack of positions to record the start and end of a token
        self._positions = []

        self._strictmode = strictmode

        # hack: the tokenizer needs access to the stack of redirection
        # nodes when it reads heredocs. this instance is shared between
        # the tokenizer and the parser, which also needs it
        self.redirstack = []

    @property
    def source(self):
        if self._added_newline:
            return self._shell_input_line[:-1]
        return self._shell_input_line

    def __iter__(self):
        while True:
            t = self.token()
            # we're finished when we see the eoftoken OR when we added a newline
            # to the input and we're there now
            if t is eoftoken or (self._added_newline and
                                 t.lexpos + 1 == len(self._shell_input_line)):
                break
            yield t

    def _createtoken(self, type_, value, flags=None):
        '''create a token with position information'''
        pos = None
        assert len(self._positions) >= 2, (type_, value)
        p2 = self._positions.pop()
        p1 = self._positions.pop()
        pos = [p1, p2]
        return token(type_, value, pos, flags)

    def token(self):
        self._two_tokens_ago, self._token_before_that, self._last_read_token = \
            self._token_before_that, self._last_read_token, self._current_token

        self._current_token = self._readtoken()
        if isinstance(self._current_token, tokentype):
            self._recordpos()
            self._current_token = self._createtoken(self._current_token,
                                                    self._current_token.value)

        if (self._parserstate & parserflags.EOFTOKEN and
            self._current_token.ttype == self._shell_eof_token):
            self._current_token = eoftoken
            # bash/parse.y L2626
        self._parserstate.discard(parserflags.EOFTOKEN)

        return self._current_token

    def _readtoken(self):
        character = None
        peek_char = None

        if self._token_to_read is not None:
            t = self._token_to_read
            self._token_to_read = None
            return t

        # bashlex/parse.y L2989 COND_COMMAND
        character = self._getc(True)
        while character is not None and _shellblank(character):
            character = self._getc(True)

        if character is None:
            return eoftoken

        if character == '#':
            self._discard_until('\n')
            self._getc(False)
            character = '\n'

        self._recordpos(1)

        if character == '\n':
            # bashlex/parse.y L3034 ALIAS
            heredoc.gatherheredocuments(self)

            self._parserstate.discard(parserflags.ASSIGNOK)
            return tokentype(character)

        if self._parserstate & parserflags.REGEXP:
            return self._readtokenword(character)

        if _shellmeta(character) and not (self._parserstate & parserflags.DBLPAREN):
            self._parserstate.discard(parserflags.ASSIGNOK)
            peek_char = self._getc(True)

            both = character
            if peek_char:
                both += peek_char
            if character == peek_char:
                if character == '<':
                    peek_char = self._getc()
                    if peek_char == '-':
                        return tokentype.LESS_LESS_MINUS
                    elif peek_char == '<':
                        return tokentype.LESS_LESS_LESS
                    else:
                        self._ungetc(peek_char)
                        return tokentype.LESS_LESS
                elif character == '>':
                    return tokentype.GREATER_GREATER
                elif character == ';':
                    self._parserstate |= parserflags.CASEPAT
                    # bashlex/parse.y L3085 ALIAS
                    peek_char = self._getc()
                    if peek_char == '&':
                        return tokentype.SEMI_SEMI_AND
                    else:
                        self._ungetc(peek_char)
                        return tokentype.SEMI_SEMI
                elif character == '&':
                    return tokentype.AND_AND
                elif character == '|':
                    return tokentype.OR_OR
                # bashlex/parse.y L3105
            elif both == '<&':
                return tokentype.LESS_AND
            elif both == '>&':
                return tokentype.GREATER_AND
            elif both == '<>':
                return tokentype.LESS_GREATER
            elif both == '>|':
                return tokentype.GREATER_BAR
            elif both == '&>':
                peek_char = self._getc()
                if peek_char == '>':
                    return tokentype.AND_GREATER_GREATER
                else:
                    self._ungetc(peek_char)
                    return tokentype.AND_GREATER
            elif both == '|&':
                return tokentype.BAR_AND
            elif both == ';&':
                return tokentype.SEMI_AND

            self._ungetc(peek_char)
            if character == ')' and self._last_read_token.value == '(' and self._token_before_that.ttype == tokentype.WORD:
                self._parserstate.add(parserflags.ALLOWOPNBRC)
                # bashlex/parse.y L3155

            if character == '(' and not self._parserstate & parserflags.CASEPAT:
                self._parserstate.add(parserflags.SUBSHELL)
            elif self._parserstate & parserflags.CASEPAT and character == ')':
                self._parserstate.discard(parserflags.CASEPAT)
            elif self._parserstate & parserflags.SUBSHELL and character == ')':
                self._parserstate.discard(parserflags.SUBSHELL)

            if character not in '<>' or peek_char != '(':
                return tokentype(character)

        if character == '-' and (self._last_read_token.ttype == tokentype.LESS_AND or self._last_read_token.ttype == tokentype.GREATER_AND):
            return tokentype(character)

        return self._readtokenword(character)

    def _readtokenword(self, c):
        d = {}
        d['all_digit_token'] = c.isdigit()
        d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False

        tokenword = []

        def handleshellquote():
            self._push_delimiter(c)
            try:
                ttok = self._parse_matched_pair(c, c, c, parsingcommand=(c == '`'))
            finally:
                self._pop_delimiter()

            tokenword.append(c)
            tokenword.extend(ttok)
            d['all_digit_token'] = False
            d['quoted'] = True
            if not d['dollar_present']:
                d['dollar_present'] = c == '"' and '$' in ttok

        def handleshellexp():
            peek_char = self._getc()
            if peek_char == '(' or (c == '$' and peek_char in '{['):
                # try:
                if peek_char == '{':
                    ttok = self._parse_matched_pair(cd, '{', '}', firstclose=True, dolbrace=True)
                elif peek_char == '(':
                    self._push_delimiter(peek_char)
                    ttok = self._parse_comsub(cd, '(', ')', parsingcommand=True)
                    self._pop_delimiter()
                else:
                    ttok = self._parse_matched_pair(cd, '[', ']')
                # except MatchedPairError:
                #   return -1

                tokenword.append(c)
                tokenword.append(peek_char)
                tokenword.extend(ttok)
                d['dollar_present'] = True
                d['all_digit_token'] = False

                # goto next_character
            elif c == '$' and peek_char in '\'"':
                self._push_delimiter(peek_char)
                try:
                    ttok = self._parse_matched_pair(peek_char, peek_char, peek_char,
                                                    allowesc=(peek_char == "'"))
                # except MatchedPairError:
                #    return -1
                finally:
                    self._pop_delimiter()

                #if peek_char == "'":
                #    # XXX ansiexpand
                #    ttok = shutils.single_quote(ttok)
                #else:
                #    ttok = shutils.double_quote(ttok)

                tokenword.append(c)
                tokenword.append(peek_char)
                tokenword.extend(ttok)
                d['quoted'] = True
                d['all_digit_token'] = False

                # goto next_character
            elif c == '$' and peek_char == '$':
                tokenword.append('$')
                tokenword.append('$')
                d['dollar_present'] = True
                d['all_digit_token'] = False

                # goto next_character
            else:
                self._ungetc(peek_char)
                return True

            # bashlex/parse.y L4699 ARRAY_VARS

        def handleescapedchar():
            tokenword.append(c)
            d['all_digit_token'] &= c.isdigit()
            if not d['dollar_present']:
                d['dollar_present'] = c == '$'

        while True:
            if c is None:
                break

            if d['pass_next_character']:
                d['pass_next_character'] = False
                handleescapedchar()
                # goto escaped_character
            else:
                cd = self._current_delimiter()
                gotonext = False
                if c == '\\':
                    peek_char = self._getc(False)

                    if peek_char == '\n':
                        c = '\n'
                        gotonext = True
                        # goto next_character
                    else:
                        self._ungetc(peek_char)

                        if (cd is None or cd == '`' or
                            (cd == '"' and peek_char is not None and
                             'dquote' in sh_syntaxtab[peek_char])):
                            d['pass_next_character'] = True
                            d['quoted'] = True

                            handleescapedchar()
                            gotonext = True
                            # goto got_character
                elif _shellquote(c):
                    handleshellquote()
                    gotonext = True
                    # goto next_character
                # bashlex/parse.y L4542
                # bashlex/parse.y L4567
                elif _shellexp(c):
                    gotonext = not handleshellexp()
                    # bashlex/parse.y L4699
                if not gotonext:
                    if _shellbreak(c):
                        self._ungetc(c)
                        break
                    else:
                        handleescapedchar()

            # got_character
            # got_escaped_character

            # tokenword.append(c)
            # all_digit_token &= c.isdigit()
            # if not dollar_present:
            #     dollar_present = c == '$'

            # next_character
            cd = self._current_delimiter()
            c = self._getc(cd != "'" and not d['pass_next_character'])

        # got_token
        self._recordpos()

        tokenword = ''.join(tokenword)

        if d['all_digit_token'] and (c in '<>' or self._last_read_token.ttype in (tokentype.LESS_AND, tokentype.GREATER_AND)) and shutils.legal_number(tokenword):
            return self._createtoken(tokentype.NUMBER, int(tokenword))

        # bashlex/parse.y L4811
        specialtokentype = self._specialcasetokens(tokenword)
        if specialtokentype:
            return self._createtoken(specialtokentype, tokenword)

        if not d['dollar_present'] and not d['quoted'] and self._reserved_word_acceptable(self._last_read_token):
            if tokenword in valid_reserved_first_command:
                ttype = valid_reserved_first_command[tokenword]
                ps = self._parserstate
                if ps & parserflags.CASEPAT and ttype != tokentype.ESAC:
                    pass
                elif ttype == tokentype.TIME and not self._time_command_acceptable():
                    pass
                elif ttype == tokentype.ESAC:
                    ps.discard(parserflags.CASEPAT)
                    ps.discard(parserflags.CASESTMT)
                elif ttype == tokentype.CASE:
                    ps.add(parserflags.CASESTMT)
                elif ttype == tokentype.COND_END:
                    ps.discard(parserflags.CONDCMD)
                    ps.discard(parserflags.CONDEXPR)
                elif ttype == tokentype.COND_START:
                    ps.add(parserflags.CONDCMD)
                elif ttype == tokentype.LEFT_CURLY:
                    self._open_brace_count += 1
                elif ttype == tokentype.RIGHT_CURLY and self._open_brace_count:
                    self._open_brace_count -= 1
                return self._createtoken(ttype, tokenword)

        tokenword = self._createtoken(tokentype.WORD, tokenword, utils.typedset(wordflags))
        if d['dollar_present']:
            tokenword.flags.add(wordflags.HASDOLLAR)
        if d['quoted']:
            tokenword.flags.add(wordflags.QUOTED)
        if d['compound_assignment'] and tokenword[-1] == ')':
            tokenword.flags.add(wordflags.COMPASSIGN)
        if self._is_assignment(tokenword.value, bool(self._parserstate & parserflags.COMPASSIGN)):
            tokenword.flags.add(wordflags.ASSIGNMENT)
            if self._assignment_acceptable(self._last_read_token):
                tokenword.flags.add(wordflags.NOSPLIT)
                if self._parserstate & parserflags.COMPASSIGN:
                    tokenword.flags.add(wordflags.NOGLOB)

        # bashlex/parse.y L4865
        if self._command_token_position(self._last_read_token):
            pass

        if tokenword.value[0] == '{' and tokenword.value[-1] == '}' and c in '<>':
            if shutils.legal_identifier(tokenword.value[1:]):
                # XXX is this needed?
                tokenword.value = tokenword.value[1:]
                tokenword.ttype = tokentype.REDIR_WORD

            return tokenword

        if len(tokenword.flags & set([wordflags.ASSIGNMENT, wordflags.NOSPLIT])) == 2:
            tokenword.ttype = tokentype.ASSIGNMENT_WORD

        if self._last_read_token.ttype == tokentype.FUNCTION:
            self._parserstate.add(parserflags.ALLOWOPNBRC)
            self._function_dstart = self._line_number
        elif self._last_read_token.ttype in (tokentype.CASE, tokentype.SELECT, tokentype.FOR):
            pass # bashlex/parse.y L4907

        return tokenword

    def _parse_comsub(self, doublequotes, open, close, parsingcommand=False,
                      dquote=False, firstclose=False):
        peekc = self._getc(False)
        self._ungetc(peekc)

        if peekc == '(':
            return self._parse_matched_pair(doublequotes, open, close)

        count = 1
        dollarok = True

        checkcase = bool(parsingcommand and (doublequotes is None or doublequotes not in "'\"") and not dquote)
        checkcomment = checkcase

        startlineno = self._line_number
        heredelim = ''
        stripdoc = insideheredoc = insidecomment = insideword = insidecase = False
        readingheredocdelim = False
        wasdollar = passnextchar = False
        reservedwordok = True
        lexfirstind = -1
        lexrwlen = 0

        ret = ''

        while count:
            c = self._getc(doublequotes != "'" and not insidecomment and not passnextchar)

            if c is None:
                raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)

            # bashlex/parse.y L3571
            if c == '\n':
                if readingheredocdelim and heredelim:
                    readingheredocdelim = False
                    insideheredoc = True
                    lexfirstind = len(ret) + 1
                elif insideheredoc:
                    tind = lexfirstind
                    while stripdoc and ret[tind] == '\t':
                        tind += 1
                    if ret[tind:] == heredelim:
                        stripdoc = insideheredoc = False
                        heredelim = ''
                        lexfirstind = -1
                    else:
                        lexfirstind = len(ret) + 1
            # bashlex/parse.y L3599
            if insideheredoc and c == close and count == 1:
                tind = lexfirstind
                while stripdoc and ret[tind] == '\t':
                    tind += 1
                if ret[tind:] == heredelim:
                    stripdoc = insideheredoc = False
                    heredelim = ''
                    lexfirstind = -1

            if insidecomment or insideheredoc:
                ret += c

                if insidecomment and c == '\n':
                    insidecomment = False

                continue

            if passnextchar:
                passnextchar = False
                # XXX is this needed?
                # if doublequotes != "'" and c == '\n':
                #     if ret:
                #         ret = ret[:-1]
                # else:
                #     ret += c
                ret += c
                continue

            if _shellbreak(c):
                insideword = False
            else:
                if insideword:
                    lexwlen += 1
                else:
                    insideword = True
                    lexwlen = 0

            if _shellblank(c) and not readingheredocdelim and not lexrwlen:
                ret += c
                continue

            # bashlex/parse.y L3686
            if readingheredocdelim:
                if lexfirstind == -1 and not _shellbreak(c):
                    lexfirstind = len(ret)
                elif lexfirstind >= 0 and not passnextchar and _shellbreak(c):
                    if not heredelim:
                        nestret = ret[lexfirstind:]
                        heredelim = shutils.removequotes(nestret)
                    if c == '\n':
                        insideheredoc = True
                        readingheredocdelim = False
                        lexfirstind = len(ret) + 1
                    else:
                        lexfirstind = -1

            if not reservedwordok and checkcase and not insidecomment and (_shellmeta(c) or c == '\n'):
                ret += c
                peekc = self._getc(True)
                if c == peekc and c in '&|;':
                    ret += peekc
                    reservedwordok = True
                    lexrwlen = 0
                    continue
                elif c == '\n' or c in '&|;':
                    self._ungetc(peekc)
                    reservedwordok = True
                    lexrwlen = 0
                    continue
                elif c is None:
                    raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self) # pragma: no coverage
                else:
                    ret = ret[:-1]
                    self._ungetc(peekc)

            # bashlex/parse.y L3761
            if reservedwordok:
                if c.islower():
                    ret += c
                    lexrwlen += 1
                    continue
                elif lexrwlen == 4 and _shellbreak(c):
                    if ret[-4:] == 'case':
                        insidecase = True
                    elif ret[-4:] == 'esac':
                        insidecase = False
                    reservedwordok = False
                elif (checkcomment and c == '#' and (lexrwlen == 0 or
                        (insideword and lexwlen == 0))):
                    pass
                elif (not insidecase and (_shellblank(c) or c == '\n') and
                    lexrwlen == 2 and ret[-2:] == 'do'):
                    lexrwlen = 0
                elif insidecase and c != '\n':
                    reservedwordok = False
                elif not _shellbreak(c):
                    reservedwordok = False

            if not insidecomment and checkcase and c == '<':
                ret += c
                peekc = self._getc(True)
                if peekc is None:
                    raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
                if peekc == c:
                    ret += peekc
                    peekc = self._getc(True)
                    if peekc is None:
                        raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)
                    elif peekc == '-':
                        ret += peekc
                        stripdoc = True
                    else:
                        self._ungetc(peekc)

                    if peekc != '<':
                        readingheredocdelim = True
                        lexfirstind = -1

                    continue
                else:
                    c = peekc
            elif checkcomment and not insidecomment and c == '#' and ((reservedwordok
                    and lexrwlen == 0) or insideword or lexwlen == 0):
                insidecomment = True

            if c == close and not insidecase:
                count -= 1
            elif not firstclose and not insidecase and c == open:
                count += 1

            ret += c

            if count == 0:
                break

            if c == '\\':
                passnextchar = True

            # bashlex/parse.y L3897
            if _shellquote(c):
                self._push_delimiter(c)
                try:
                    if wasdollar and c == "'":
                        nestret = self._parse_matched_pair(c, c, c,
                                                           allowesc=True,
                                                           dquote=True)
                    else:
                        nestret = self._parse_matched_pair(c, c, c,
                                                           dquote=True)
                finally:
                    self._pop_delimiter()

                # XXX is this necessary?
                # if wasdollar and c == "'" and not rdquote:
                #     if not rdquote:
                #         nestret = shutils.single_quote(nestret)
                #     ret = ret[:-2]
                # elif wasdollar and c == '"' and not rdquote:
                #     nestret = shutils.double_quote(nestret)
                #     ret = ret[:-2]

                ret += nestret
            # check for $(), $[], or ${} inside command substitution
            elif wasdollar and c in '({[':
                if not insidecase and open == c:
                    count -= 1
                if c == '(':
                    nestret = self._parse_comsub(None, '(', ')',
                                                 parsingcommand=True,
                                                 dquote=False)
                elif c == '{':
                    nestret = self._parse_matched_pair(None, '{', '}',
                                                       firstclose=True,
                                                       dolbrace=True,
                                                       dquote=True)
                elif c == '[':
                    nestret = self._parse_matched_pair(None, '[', ']',
                                                       dquote=True)

                ret += nestret

            wasdollar = c == '$'

        return ret

    def _parse_matched_pair(self, doublequotes, open, close, parsingcommand=False, allowesc=False, dquote=False, firstclose=False, dolbrace=False, arraysub=False):
        count = 1
        dolbracestate = ''
        if dolbrace:
            dolbracestate = 'param'

        insidecomment = False
        lookforcomments = False
        sawdollar = False

        if parsingcommand and doublequotes not in "`'\"" and dquote:
            lookforcomments = True

        rdquote = True if doublequotes == '"' else dquote
        passnextchar = False
        startlineno = self._line_number

        ret = ''

        def handledollarword():
            if open == c:
                count -= 1

            # bashlex/parse.y L3486
            if c == '(':
                return self._parse_comsub(None, '(', ')',
                                          parsingcommand=True,
                                          dquote=False)
            elif c == '{':
                return self._parse_matched_pair(None, '{', '}',
                                                firstclose=True,
                                                dquote=rdquote,
                                                dolbrace=True)
            elif c == '[':
                return self._parse_matched_pair(None, '[', ']', dquote=rdquote)
            else:
                assert False # pragma: no cover

        while count:
            c = self._getc(doublequotes != "'" and not passnextchar)
            if c is None:
                raise MatchedPairError(startlineno, 'unexpected EOF while looking for matching %r' % close, self)

            # bashlex/parse.y L3285
            # if c == '\n':
            #    continue

            if insidecomment:
                ret += c
                if c == '\n':
                    insidecomment = False
                continue
            elif lookforcomments and not insidecomment and c == '#' and (not ret
                    or ret[-1] == '\n' or _shellblank(ret[-1])):
                insidecomment = True

            # last char was backslash
            if passnextchar:
                passnextchar = False
                #if doublequotes != "'" and c == '\n':
                #    if ret:
                #        ret = ret[:-1]
                #    continue
                ret += c
                continue
            elif c == close:
                count -= 1
            elif open != close and sawdollar and open == '{' and c == open:
                count += 1
            elif not firstclose and c == open:
                count += 1

            ret += c
            if count == 0:
                break

            if open == "'":
                if allowesc and c == "\\":
                    passnextchar = True
                continue
            if c == "\\":
                passnextchar = True
            if dolbrace:
                if dolbracestate == 'param':
                    if len(ret) > 1:
                        dd = {'%' : 'quote', '#' : 'quote', '/' : 'quote2', '^' : 'quote',
                                ',' : 'quote'}
                        if c in dd:
                            dolbracestate = dd[c]
                    elif c in '#%^,~:-=?+/':
                        dolbracestate = 'op'
                if dolbracestate == 'op' and c in '#%^,~:-=?+/':
                    dolbracestate = 'word'

            if dolbracestate not in 'quote2' and dquote and dolbrace and c == "'":
                continue

            if open != close:
                if _shellquote(c):
                    self._push_delimiter(c)
                    try:
                        if sawdollar and "'":
                            nestret = self._parse_matched_pair(c, c, c, parsingcommand=parsingcommand, allowesc=True, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
                        else:
                            nestret = self._parse_matched_pair(c, c, c, parsingcommand=parsingcommand, allowesc=allowesc, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
                    finally:
                        self._pop_delimiter()

                    # bashlex/parse.y L3419
                    if sawdollar and c == "'":
                        pass
                    elif sawdollar and c == '"':
                        ret = ret[:-2] # back up before the $"

                    ret += nestret
                elif arraysub and sawdollar and c in '({[':
                    # goto parse_dollar_word
                    ret += handledollarword()
            elif open == '"' and c == '`':
                ret += self._parse_matched_pair(None, '`', '`', parsingcommand=parsingcommand, allowesc=allowesc, dquote=dquote, firstclose=firstclose, dolbrace=dolbrace)
            elif open != '`' and sawdollar and c in '({[':
                ret += handledollarword()

            sawdollar = c == '$'

        return ret


    def _is_assignment(self, value, iscompassign):
        c = value[0]

        def legalvariablechar(x):
            return x.isalnum() or x == '_'

        if not c.isalpha() and c != '_':
            return

        for i, c in enumerate(value):
            if c == '=':
                return i

            # bash/general.c L289
            if c == '+' and i + 1 < len(value) and value[i+1] == '=':
                return i+1

            if not legalvariablechar(c):
                return False

    def _command_token_position(self, token):
        return (token.ttype == tokentype.ASSIGNMENT_WORD or
                self._parserstate & parserflags.REDIRLIST or
                (token.ttype not in (tokentype.SEMI_SEMI, tokentype.SEMI_AND, tokentype.SEMI_SEMI_AND) and self._reserved_word_acceptable(token)))

    def _assignment_acceptable(self, token):
        return self._command_token_position(token) and not self._parserstate & parserflags.CASEPAT

    def _time_command_acceptable(self):
        pass

    def _reserved_word_acceptable(self, tok):
        if not tok or (tok.ttype in _reserved or tok.value in _reserved):
            return True
        # bash/parse.y L4955 cOPROCESS_SUPPORT

        if (self._last_read_token.ttype == tokentype.WORD and
            self._token_before_that.ttype == tokentype.FUNCTION):
            return True

        return False

    def _pop_delimiter(self):
        self._dstack.pop()

    def _push_delimiter(self, c):
        self._dstack.append(c)

    def _current_delimiter(self):
        if self._dstack:
            return self._dstack[-1]

    def _ungetc(self, c):
        if (self._shell_input_line and self._shell_input_line_index
            and self._shell_input_line_index <= len(self._shell_input_line)):
            self._shell_input_line_index -= 1
        else:
            self._eol_ungetc_lookahead = c

    def _getc(self, remove_quoted_newline=True):
        if self._eol_ungetc_lookahead is not None:
            c = self._eol_ungetc_lookahead
            self._eol_ungetc_lookahead = None
            return c

        # bash/parse.y L2220

        while True:
            if self._shell_input_line_index < len(self._shell_input_line):
                c = self._shell_input_line[self._shell_input_line_index]
                self._shell_input_line_index += 1
            else:
                c = None

            if c == '\\' and remove_quoted_newline and self._shell_input_line[self._shell_input_line_index] == '\n':
                self._line_number += 1
                # skip past the newline
                self._shell_input_line_index += 1
                continue
            else:
                return c

            #if c is None and self._shell_input_line_terminator is None:
            #    if self._shell_input_line_index != 0:
            #        return '\n'
            #    else:
            #        return None

            #return c

    def _discard_until(self, character):
        c = self._getc(False)
        while c is not None and c != character:
            c = self._getc(False)
        if c is not None:
            self._ungetc(c)

    def _recordpos(self, relativeoffset=0):
        '''record the current index of the tokenizer into the positions stack
        while adding relativeoffset from it'''
        self._positions.append(self._shell_input_line_index - relativeoffset)

    def readline(self, removequotenewline):
        linebuffer = []
        passnext = indx = 0
        while True:
            c = self._getc()
            if c is None:
                if indx == 0:
                    return None
                c = '\n'

            if passnext:
                linebuffer.append(c)
                indx += 1
                passnext = False
            elif c == '\\' and removequotenewline:
                peekc = self._getc()
                if peekc == '\n':
                    self._line_number += 1
                    continue
                else:
                    self._ungetc(peekc)
                    passnext = True
                    linebuffer.append(c)
                    indx += 1
            else:
                linebuffer.append(c)
                indx += 1

            if c == '\n':
                return ''.join(linebuffer)

    def _peekc(self, *args):
        peek_char = self._getc(*args)
        # only unget if we actually read something
        if peek_char is not None:
            self._ungetc(peek_char)
        return peek_char

    def _specialcasetokens(self, tokstr):
        if (self._last_read_token.ttype == tokentype.WORD and
            self._token_before_that.ttype in (tokentype.FOR,
                                              tokentype.CASE,
                                              tokentype.SELECT) and
            tokstr == 'in'):
                if self._token_before_that.ttype == tokentype.CASE:
                    self._parserstate.add(parserflags.CASEPAT)
                    self._esacs_needed_count += 1
                return tokentype.IN

        if (self._last_read_token.ttype == tokentype.WORD and
            self._token_before_that.ttype in (tokentype.FOR, tokentype.SELECT) and
            tokstr == 'do'):
            return tokentype.DO

        if self._esacs_needed_count:
            self._esacs_needed_count -= 1
            if tokstr == 'esac':
                self._parserstate.discard(parserflags.CASEPAT)
                return tokentype.ESAC

        if self._parserstate & parserflags.ALLOWOPNBRC:
            self._parserstate.discard(parserflags.ALLOWOPNBRC)
            if tokstr == '{':
                self._open_brace_count += 1
                # bash/parse.y L2887
                return tokentype.LEFT_CURLY

        if (self._last_read_token.ttype == tokentype.ARITH_FOR_EXPRS and
            tokstr == 'do'):
            return tokentype.DO

        if (self._last_read_token.ttype == tokentype.ARITH_FOR_EXPRS and
            tokstr == '{'):
            self._open_brace_count += 1
            return tokentype.LEFT_CURLY

        if (self._open_brace_count and
            self._reserved_word_acceptable(self._last_read_token) and
            tokstr == '}'):
            self._open_brace_count -= 1
            return tokentype.RIGHT_CURLY

        if self._last_read_token.ttype == tokentype.TIME and tokstr == '-p':
            return tokentype.TIMEOPT

        if self._last_read_token.ttype == tokentype.TIMEOPT and tokstr == '--':
            return tokentype.TIMEIGN

        if self._parserstate & parserflags.CONDEXPR and tokstr == ']]':
            return tokentype.COND_END