import os, copy

from bashlex import yacc, tokenizer, state, ast, subst, flags, errors, heredoc

def _partsspan(parts):
    return parts[0].pos[0], parts[-1].pos[1]

tokens = [e.name for e in tokenizer.tokentype]
precedence = (
    ('left', 'AMPERSAND', 'SEMICOLON', 'NEWLINE', 'EOF'),
    ('left', 'AND_AND', 'OR_OR'),
    ('right', 'BAR', 'BAR_AND')
)

def handleNotImplemented(p, type):
    if len(p) == 2:
        raise NotImplementedError('type = {%s}, token = {%s}' % (type, p[1]))
    else:
        raise NotImplementedError('type = {%s}, token = {%s}, parts = {%s}' % (type, p[1], p[2]))

def handleAssert(p, test):
    if not test:
        raise AssertionError('token = {%s}' % p[1])

def p_inputunit(p):
    '''inputunit : simple_list simple_list_terminator
                 | NEWLINE
                 | error NEWLINE
                 | EOF'''
    # XXX
    if p.lexer._parserstate & flags.parser.CMDSUBST:
        p.lexer._parserstate.add(flags.parser.EOFTOKEN)

    if isinstance(p[1], ast.node):
        p[0] = p[1]
        # accept right here in case the input contains more lines that are
        # not part of the current command
        p.accept()

def p_word_list(p):
    '''word_list : WORD
                 | word_list WORD'''
    parserobj = p.context
    if len(p) == 2:
        p[0] = [_expandword(parserobj, p.slice[1])]
    else:
        p[0] = p[1]
        p[0].append(_expandword(parserobj, p.slice[2]))

def p_redirection_heredoc(p):
    '''redirection : LESS_LESS WORD
                   | NUMBER LESS_LESS WORD
                   | REDIR_WORD LESS_LESS WORD
                   | LESS_LESS_MINUS WORD
                   | NUMBER LESS_LESS_MINUS WORD
                   | REDIR_WORD LESS_LESS_MINUS WORD'''
    parserobj = p.context
    assert isinstance(parserobj, _parser)

    output = ast.node(kind='word', word=p[len(p)-1], parts=[],
                      pos=p.lexspan(len(p)-1))
    if len(p) == 3:
        p[0] = ast.node(kind='redirect', input=None, type=p[1], heredoc=None,
                        output=output, pos=(p.lexpos(1), p.endlexpos(2)))
    else:
        p[0] = ast.node(kind='redirect', input=p[1], type=p[2], heredoc=None,
                        output=output, pos=(p.lexpos(1), p.endlexpos(3)))

    if p.slice[len(p)-2].ttype == tokenizer.tokentype.LESS_LESS:
        parserobj.redirstack.append((p[0], False))
    else:
        parserobj.redirstack.append((p[0], True))

def p_redirection(p):
    '''redirection : GREATER WORD
                   | LESS WORD
                   | NUMBER GREATER WORD
                   | NUMBER LESS WORD
                   | REDIR_WORD GREATER WORD
                   | REDIR_WORD LESS WORD
                   | GREATER_GREATER WORD
                   | NUMBER GREATER_GREATER WORD
                   | REDIR_WORD GREATER_GREATER WORD
                   | GREATER_BAR WORD
                   | NUMBER GREATER_BAR WORD
                   | REDIR_WORD GREATER_BAR WORD
                   | LESS_GREATER WORD
                   | NUMBER LESS_GREATER WORD
                   | REDIR_WORD LESS_GREATER WORD
                   | LESS_LESS_LESS WORD
                   | NUMBER LESS_LESS_LESS WORD
                   | REDIR_WORD LESS_LESS_LESS WORD
                   | LESS_AND NUMBER
                   | NUMBER LESS_AND NUMBER
                   | REDIR_WORD LESS_AND NUMBER
                   | GREATER_AND NUMBER
                   | NUMBER GREATER_AND NUMBER
                   | REDIR_WORD GREATER_AND NUMBER
                   | LESS_AND WORD
                   | NUMBER LESS_AND WORD
                   | REDIR_WORD LESS_AND WORD
                   | GREATER_AND WORD
                   | NUMBER GREATER_AND WORD
                   | REDIR_WORD GREATER_AND WORD
                   | GREATER_AND DASH
                   | NUMBER GREATER_AND DASH
                   | REDIR_WORD GREATER_AND DASH
                   | LESS_AND DASH
                   | NUMBER LESS_AND DASH
                   | REDIR_WORD LESS_AND DASH
                   | AND_GREATER WORD
                   | AND_GREATER_GREATER WORD'''
    parserobj = p.context
    if len(p) == 3:
        output = p[2]
        if p.slice[2].ttype == tokenizer.tokentype.WORD:
            output = _expandword(parserobj, p.slice[2])
        p[0] = ast.node(kind='redirect', input=None, type=p[1], heredoc=None,
                        output=output, pos=(p.lexpos(1), p.endlexpos(2)))
    else:
        output = p[3]
        if p.slice[3].ttype == tokenizer.tokentype.WORD:
            output = _expandword(parserobj, p.slice[3])
        p[0] = ast.node(kind='redirect', input=p[1], type=p[2], heredoc=None,
                        output=output, pos=(p.lexpos(1), p.endlexpos(3)))

def _expandword(parser, tokenword):
    if parser._expansionlimit == -1:
        # we enter this branch in the following conditions:
        # - currently parsing a substitution as a result of an expansion
        # - the previous expansion had limit == 0
        #
        # this means that this node is a descendant of a substitution in an
        # unexpanded word and will be filtered in the limit == 0 condition below
        #
        # (the reason we even expand when limit == 0 is to get quote removal)
        node = ast.node(kind='word', word=tokenword,
                        pos=(tokenword.lexpos, tokenword.endlexpos), parts=[])
        return node
    else:
        quoted = bool(tokenword.flags & flags.word.QUOTED)
        doublequoted = quoted and tokenword.value[0] == '"'

        # TODO set qheredocument
        parts, expandedword = subst._expandwordinternal(parser,
                                                        tokenword, 0,
                                                        doublequoted, 0, 0)

        # limit reached, don't include substitutions (still expanded to get
        # quote removal though)
        if parser._expansionlimit == 0:
            parts = [node for node in parts if 'substitution' not in node.kind]

        node = ast.node(kind='word', word=expandedword,
                        pos=(tokenword.lexpos, tokenword.endlexpos), parts=parts)
        return node

def p_simple_command_element(p):
    '''simple_command_element : WORD
                              | ASSIGNMENT_WORD
                              | redirection'''
    if isinstance(p[1], ast.node):
        p[0] = [p[1]]
        return

    parserobj = p.context
    p[0] = [_expandword(parserobj, p.slice[1])]

    # change the word node to an assignment if necessary
    if p.slice[1].ttype == tokenizer.tokentype.ASSIGNMENT_WORD:
        p[0][0].kind = 'assignment'

def p_redirection_list(p):
    '''redirection_list : redirection
                        | redirection_list redirection'''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]
        p[0].append(p[2])

def p_simple_command(p):
    '''simple_command : simple_command_element
                      | simple_command simple_command_element'''

    p[0] = p[1]
    if len(p) == 3:
        p[0].extend(p[2])

def p_command(p):
    '''command : simple_command
               | shell_command
               | shell_command redirection_list
               | function_def
               | coproc'''
    if isinstance(p[1], ast.node):
        p[0] = p[1]
        if len(p) == 3:
            handleAssert(p, p[0].kind == 'compound')
            p[0].redirects.extend(p[2])
            handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1])
            p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1])
    else:
        p[0] = ast.node(kind='command', parts=p[1], pos=_partsspan(p[1]))

def p_shell_command(p):
    '''shell_command : for_command
                     | case_command
                     | WHILE compound_list DO compound_list DONE
                     | UNTIL compound_list DO compound_list DONE
                     | select_command
                     | if_command
                     | subshell
                     | group_command
                     | arith_command
                     | cond_command
                     | arith_for_command'''
    if len(p) == 2:
        p[0] = p[1]
    else:
        # while or until
        handleAssert(p, p[2].kind == 'list')

        parts = _makeparts(p)
        kind = parts[0].word
        assert kind in ('while', 'until')
        p[0] = ast.node(kind='compound',
                        redirects=[],
                        list=[ast.node(kind=kind, parts=parts, pos=_partsspan(parts))],
                        pos=_partsspan(parts))

    handleAssert(p, p[0].kind == 'compound')

def _makeparts(p):
    parts = []
    for i in range(1, len(p)):
        if isinstance(p[i], ast.node):
            parts.append(p[i])
        elif isinstance(p[i], list):
            parts.extend(p[i])
        elif isinstance(p.slice[i], tokenizer.token):
            if p.slice[i].ttype == tokenizer.tokentype.WORD:
                parserobj = p.context
                parts.append(_expandword(parserobj, p.slice[i]))
            else:
                parts.append(ast.node(kind='reservedword', word=p[i],
                                      pos=p.lexspan(i)))
        else:
            pass

    return parts

def p_for_command(p):
    '''for_command : FOR WORD newline_list DO compound_list DONE
                   | FOR WORD newline_list LEFT_CURLY compound_list RIGHT_CURLY
                   | FOR WORD SEMICOLON newline_list DO compound_list DONE
                   | FOR WORD SEMICOLON newline_list LEFT_CURLY compound_list RIGHT_CURLY
                   | FOR WORD newline_list IN word_list list_terminator newline_list DO compound_list DONE
                   | FOR WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY
                   | FOR WORD newline_list IN list_terminator newline_list DO compound_list DONE
                   | FOR WORD newline_list IN list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY'''
    parts = _makeparts(p)
    # find the operatornode that we might have there due to
    # list_terminator/newline_list and convert it to a reservedword so its
    # considered as part of the for loop
    for i, part in enumerate(parts):
        if part.kind == 'operator' and part.op == ';':
            parts[i] = ast.node(kind='reservedword', word=';', pos=part.pos)
            break # there could be only one in there...

    p[0] = ast.node(kind='compound',
                    redirects=[],
                    list=[ast.node(kind='for', parts=parts, pos=_partsspan(parts))],
                    pos=_partsspan(parts))

def p_arith_for_command(p):
    '''arith_for_command : FOR ARITH_FOR_EXPRS list_terminator newline_list DO compound_list DONE
                         | FOR ARITH_FOR_EXPRS list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY
                         | FOR ARITH_FOR_EXPRS DO compound_list DONE
                         | FOR ARITH_FOR_EXPRS LEFT_CURLY compound_list RIGHT_CURLY'''
    handleNotImplemented(p, 'arithmetic for')

def p_select_command(p):
    '''select_command : SELECT WORD newline_list DO list DONE
                      | SELECT WORD newline_list LEFT_CURLY list RIGHT_CURLY
                      | SELECT WORD SEMICOLON newline_list DO list DONE
                      | SELECT WORD SEMICOLON newline_list LEFT_CURLY list RIGHT_CURLY
                      | SELECT WORD newline_list IN word_list list_terminator newline_list DO list DONE
                      | SELECT WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY list RIGHT_CURLY'''
    handleNotImplemented(p, 'select command')

def p_case_command(p):
    '''case_command : CASE WORD newline_list IN newline_list ESAC
                    | CASE WORD newline_list IN case_clause_sequence newline_list ESAC
                    | CASE WORD newline_list IN case_clause ESAC'''
    handleNotImplemented(p, 'case command')

def p_function_def(p):
    '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body
                    | FUNCTION WORD LEFT_PAREN RIGHT_PAREN newline_list function_body
                    | FUNCTION WORD newline_list function_body'''
    parts = _makeparts(p)
    body = parts[-1]
    name = parts[ast.findfirstkind(parts, 'word')]

    p[0] = ast.node(kind='function', name=name, body=body, parts=parts,
                    pos=_partsspan(parts))

def p_function_body(p):
    '''function_body : shell_command
                     | shell_command redirection_list'''
    handleAssert(p, p[1].kind == 'compound')

    p[0] = p[1]
    if len(p) == 3:
        p[0].redirects.extend(p[2])
        handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1])
        p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1])

def p_subshell(p):
    '''subshell : LEFT_PAREN compound_list RIGHT_PAREN'''
    lparen = ast.node(kind='reservedword', word=p[1], pos=p.lexspan(1))
    rparen = ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3))
    parts = [lparen, p[2], rparen]
    p[0] = ast.node(kind='compound', list=parts, redirects=[],
                    pos=_partsspan(parts))

def p_coproc(p):
    '''coproc : COPROC shell_command
              | COPROC shell_command redirection_list
              | COPROC WORD shell_command
              | COPROC WORD shell_command redirection_list
              | COPROC simple_command'''
    handleNotImplemented(p, 'coproc')

def p_if_command(p):
    '''if_command : IF compound_list THEN compound_list FI
                  | IF compound_list THEN compound_list ELSE compound_list FI
                  | IF compound_list THEN compound_list elif_clause FI'''
    # we currently don't distinguish the various lists that make up the
    # command, because it's not needed later on. if there will be a need
    # we can always add different nodes for elif/else.
    parts = _makeparts(p)
    p[0] = ast.node(kind='compound',
                    redirects=[],
                    list=[ast.node(kind='if', parts=parts, pos=_partsspan(parts))],
                    pos=_partsspan(parts))

def p_group_command(p):
    '''group_command : LEFT_CURLY compound_list RIGHT_CURLY'''
    lcurly = ast.node(kind='reservedword', word=p[1], pos=p.lexspan(1))
    rcurly = ast.node(kind='reservedword', word=p[3], pos=p.lexspan(3))
    parts = [lcurly, p[2], rcurly]
    p[0] = ast.node(kind='compound', list=parts, redirects=[],
                    pos=_partsspan(parts))

def p_arith_command(p):
    '''arith_command : ARITH_CMD'''
    handleNotImplemented(p, 'arithmetic command')

def p_cond_command(p):
    '''cond_command : COND_START COND_CMD COND_END'''
    handleNotImplemented(p, 'cond command')

def p_elif_clause(p):
    '''elif_clause : ELIF compound_list THEN compound_list
                   | ELIF compound_list THEN compound_list ELSE compound_list
                   | ELIF compound_list THEN compound_list elif_clause'''
    parts = []
    for i in range(1, len(p)):
        if isinstance(p[i], ast.node):
            parts.append(p[i])
        else:
            parts.append(ast.node(kind='reservedword', word=p[i], pos=p.lexspan(i)))
    p[0] = parts

def p_case_clause(p):
    '''case_clause : pattern_list
                   | case_clause_sequence pattern_list'''
    handleNotImplemented(p, 'case clause')

def p_pattern_list(p):
    '''pattern_list : newline_list pattern RIGHT_PAREN compound_list
                    | newline_list pattern RIGHT_PAREN newline_list
                    | newline_list LEFT_PAREN pattern RIGHT_PAREN compound_list
                    | newline_list LEFT_PAREN pattern RIGHT_PAREN newline_list'''
    handleNotImplemented(p, 'pattern list')

def p_case_clause_sequence(p):
    '''case_clause_sequence : pattern_list SEMI_SEMI
                            | case_clause_sequence pattern_list SEMI_SEMI
                            | pattern_list SEMI_AND
                            | case_clause_sequence pattern_list SEMI_AND
                            | pattern_list SEMI_SEMI_AND
                            | case_clause_sequence pattern_list SEMI_SEMI_AND'''
    handleNotImplemented(p, 'case clause')

def p_pattern(p):
    '''pattern : WORD
               | pattern BAR WORD'''
    handleNotImplemented(p, 'pattern')

def p_list(p):
    '''list : newline_list list0'''
    p[0] = p[2]

def p_compound_list(p):
    '''compound_list : list
                     | newline_list list1'''
    if len(p) == 2:
        p[0] = p[1]
    else:
        parts = p[2]
        if len(parts) > 1:
            p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts))
        else:
            p[0] = parts[0]

def p_list0(p):
    '''list0 : list1 NEWLINE newline_list
             | list1 AMPERSAND newline_list
             | list1 SEMICOLON newline_list'''
    parts = p[1]
    if len(parts) > 1 or p.slice[2].ttype != tokenizer.tokentype.NEWLINE:
        parts.append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
        p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts))
    else:
        p[0] = parts[0]

def p_list1(p):
    '''list1 : list1 AND_AND newline_list list1
             | list1 OR_OR newline_list list1
             | list1 AMPERSAND newline_list list1
             | list1 SEMICOLON newline_list list1
             | list1 NEWLINE newline_list list1
             | pipeline_command'''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]
        # XXX newline
        p[0].append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
        p[0].extend(p[len(p) - 1])

def p_simple_list_terminator(p):
    '''simple_list_terminator : NEWLINE
                              | EOF'''
    pass

def p_list_terminator(p):
    '''list_terminator : NEWLINE
                       | SEMICOLON
                       | EOF'''
    if p[1] == ';':
        p[0] = ast.node(kind='operator', op=';', pos=p.lexspan(1))

def p_newline_list(p):
    '''newline_list : empty
                    | newline_list NEWLINE'''
    pass

def p_simple_list(p):
    '''simple_list : simple_list1
                   | simple_list1 AMPERSAND
                   | simple_list1 SEMICOLON'''
    tok = p.lexer
    heredoc.gatherheredocuments(tok)

    if len(p) == 3 or len(p[1]) > 1:
        parts = p[1]
        if len(p) == 3:
            parts.append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
        p[0] = ast.node(kind='list', parts=parts, pos=_partsspan(parts))
    else:
        assert len(p[1]) == 1
        p[0] = p[1][0]

    if (len(p) == 2 and p.lexer._parserstate & flags.parser.CMDSUBST and
            p.lexer._current_token.nopos() == p.lexer._shell_eof_token):
        # accept the input
        p.accept()

def p_simple_list1(p):
    '''simple_list1 : simple_list1 AND_AND newline_list simple_list1
                    | simple_list1 OR_OR newline_list simple_list1
                    | simple_list1 AMPERSAND simple_list1
                    | simple_list1 SEMICOLON simple_list1
                    | pipeline_command'''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]
        p[0].append(ast.node(kind='operator', op=p[2], pos=p.lexspan(2)))
        p[0].extend(p[len(p) - 1])

def p_pipeline_command(p):
    '''pipeline_command : pipeline
                        | BANG pipeline_command
                        | timespec pipeline_command
                        | timespec list_terminator
                        | BANG list_terminator'''
    if len(p) == 2:
        if len(p[1]) == 1:
            p[0] = p[1][0]
        else:
            p[0] = ast.node(kind='pipeline', parts=p[1],
                            pos=(p[1][0].pos[0], p[1][-1].pos[1]))
    else:
        # XXX timespec
        node = ast.node(kind='reservedword', word='!', pos=p.lexspan(1))
        if p[2].kind == 'pipeline':
            p[0] = p[2]
            p[0].parts.insert(0, node)
            p[0].pos = (p[0].parts[0].pos[0], p[0].parts[-1].pos[1])
        else:
            p[0] = ast.node(kind='pipeline', parts=[node, p[2]],
                            pos=(node.pos[0], p[2].pos[1]))

def p_pipeline(p):
    '''pipeline : pipeline BAR newline_list pipeline
                | pipeline BAR_AND newline_list pipeline
                | command'''
    if len(p) == 2:
        p[0] = [p[1]]
    else:
        p[0] = p[1]
        p[0].append(ast.node(kind='pipe', pipe=p[2], pos=p.lexspan(2)))
        p[0].extend(p[len(p) - 1])

def p_timespec(p):
    '''timespec : TIME
                | TIME TIMEOPT
                | TIME TIMEOPT TIMEIGN'''
    handleNotImplemented(p, 'time command')

def p_empty(p):
    '''empty :'''
    pass

def p_error(p):
    assert isinstance(p, tokenizer.token)

    if p.ttype == tokenizer.tokentype.EOF:
        raise errors.ParsingError('unexpected EOF',
                                  p.lexer.source,
                                  len(p.lexer.source))
    else:
        raise errors.ParsingError('unexpected token %r' % p.value,
                                  p.lexer.source, p.lexpos)

yaccparser = yacc.yacc(outputdir=os.path.dirname(__file__),         
                        debug=False)

# some hack to fix yacc's reduction on command substitutions:
# which state to fix is derived from static transition tables
# as states are changeable among python versions and architectures
# the only state that is considered fixed is the initial state: 0
def get_correction_states():
    reduce = yaccparser.goto[0]['simple_list'] #~10
    state2 = yaccparser.action[reduce]['NEWLINE'] #63
    state1 = yaccparser.goto[reduce]['simple_list_terminator'] #~10
    return state1, state2

def get_correction_rightparen_states():
    state1 = yaccparser.goto[0]['pipeline_command']
    state2 = yaccparser.goto[0]['simple_list1'] #11
    state_temp = yaccparser.action[state2]['SEMICOLON'] #65
    state3 = yaccparser.goto[state_temp]['simple_list1']
    return state1, state2, state3

for tt in tokenizer.tokentype:
    states = get_correction_states()
    yaccparser.action[states[0]][tt.name] = -1
    yaccparser.action[states[1]][tt.name] = -141

states = get_correction_rightparen_states()
yaccparser.action[states[0]]['RIGHT_PAREN'] = -155
yaccparser.action[states[1]]['RIGHT_PAREN'] = -148
yaccparser.action[states[2]]['RIGHT_PAREN'] = -154

def parsesingle(s, strictmode=True, expansionlimit=None, convertpos=False):
    '''like parse, but only consumes a single top level node, e.g. parsing
    'a\nb' will only return a node for 'a', leaving b unparsed'''
    p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit)
    tree = p.parse()
    if convertpos:
        ast.posconverter(s).visit(tree)
    return tree

def parse(s, strictmode=True, expansionlimit=None, convertpos=False):
    '''parse the input string, returning a list of nodes

    top level node kinds are:

    - command - a simple command
    - pipeline - a series of simple commands
    - list - a series of one or more pipelines
    - compound - contains constructs for { list; }, (list), if, for..

    leafs are word nodes (which in turn can also contain any of the
    aforementioned nodes due to command substitutions).

    when strictmode is set to False, we will:
    - skip reading a heredoc if we're at the end of the input

    expansionlimit is used to limit the amount of recursive parsing done due to
    command substitutions found during word expansion.
    '''
    p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit)
    parts = [p.parse()]

    class endfinder(ast.nodevisitor):
        def __init__(self):
            self.end = -1
        def visitheredoc(self, node, value):
            self.end = node.pos[1]

    # find the 'real' end incase we have a heredoc in there
    ef = _endfinder()
    ef.visit(parts[-1])
    index = max(parts[-1].pos[1], ef.end) + 1
    while index < len(s):
        part = _parser(s[index:], strictmode=strictmode).parse()

        if not isinstance(part, ast.node):
            break

        ast.posshifter(index).visit(part)
        parts.append(part)
        ef = _endfinder()
        ef.visit(parts[-1])
        index = max(parts[-1].pos[1], ef.end) + 1

    if convertpos:
        for tree in parts:
            ast.posconverter(s).visit(tree)

    return parts

def split(s):
    '''a utility function that mimics shlex.split but handles more
    complex shell constructs such as command substitutions inside words

    >>> list(split('a b"c"\\'d\\''))
    ['a', 'bcd']
    >>> list(split('a "b $(c)" $(d) \\'$(e)\\''))
    ['a', 'b $(c)', '$(d)', '$(e)']
    >>> list(split('a b\\n'))
    ['a', 'b', '\\n']
    '''
    p = _parser(s)
    for t in p.tok:
        if t.ttype == tokenizer.tokentype.WORD:
            quoted = bool(t.flags & flags.word.QUOTED)
            doublequoted = quoted and t.value[0] == '"'
            parts, expandedword = subst._expandwordinternal(p, t, 0,
                                                            doublequoted, 0, 0)
            yield expandedword
        else:
            yield s[t.lexpos:t.endlexpos]

class _parser(object):
    '''
    this class is mainly used to provide context to the productions
    when we're in the middle of parsing. as a hack, we shove it into the
    YaccProduction context attribute to make it accessible.
    '''
    def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None):
        assert expansionlimit is None or isinstance(expansionlimit, int)

        self.s = s
        self._strictmode = strictmode
        self._expansionlimit = expansionlimit

        if tokenizerargs is None:
            tokenizerargs = {}
        self.parserstate = tokenizerargs.pop('parserstate', state.parserstate())

        self.tok = tokenizer.tokenizer(s,
                                       parserstate=self.parserstate,
                                       strictmode=strictmode,
                                       **tokenizerargs)

        self.redirstack = self.tok.redirstack

    def parse(self):
        # yacc.yacc returns a parser object that is not reentrant, it has
        # some mutable state. we make a shallow copy of it so no
        # state spills over to the next call to parse on it
        theparser = copy.copy(yaccparser)
        tree = theparser.parse(lexer=self.tok, context=self)

        return tree

class _endfinder(ast.nodevisitor):
    '''helper class to find the "real" end pos of a node that contains
    a heredoc. this is a hack because heredoc aren't really part of any node
    since they don't always follow the end of a node and might appear on
    a different line'''
    def __init__(self):
        self.end = -1
    def visitheredoc(self, node, value):
        self.end = node.pos[1]