382 lines
13 KiB
Python
382 lines
13 KiB
Python
import copy
|
|
|
|
from bashlex import ast, flags, tokenizer, errors
|
|
|
|
def _recursiveparse(parserobj, base, sindex, tokenizerargs=None):
|
|
# TODO: fix this hack that prevents mutual import
|
|
from bashlex import parser
|
|
|
|
tok = parserobj.tok
|
|
|
|
if tokenizerargs is None:
|
|
tokenizerargs = {'parserstate' : copy.copy(tok._parserstate),
|
|
'lastreadtoken' : tok._last_read_token,
|
|
'tokenbeforethat' : tok._token_before_that,
|
|
'twotokensago' : tok._two_tokens_ago}
|
|
|
|
string = base[sindex:]
|
|
newlimit = parserobj._expansionlimit
|
|
if newlimit is not None:
|
|
newlimit -= 1
|
|
p = parser._parser(string, tokenizerargs=tokenizerargs,
|
|
expansionlimit=newlimit)
|
|
node = p.parse()
|
|
|
|
endp = node.pos[1]
|
|
_adjustpositions(node, sindex, len(base))
|
|
|
|
return node, endp
|
|
|
|
def _parsedolparen(parserobj, base, sindex):
|
|
copiedps = copy.copy(parserobj.parserstate)
|
|
copiedps.add(flags.parser.CMDSUBST)
|
|
copiedps.add(flags.parser.EOFTOKEN)
|
|
string = base[sindex:]
|
|
|
|
tokenizerargs = {'eoftoken' : tokenizer.token(tokenizer.tokentype.RIGHT_PAREN, ')'),
|
|
'parserstate' : copiedps,
|
|
'lastreadtoken' : parserobj.tok._last_read_token,
|
|
'tokenbeforethat' : parserobj.tok._token_before_that,
|
|
'twotokensago' : parserobj.tok._two_tokens_ago}
|
|
|
|
node, endp = _recursiveparse(parserobj, base, sindex, tokenizerargs)
|
|
|
|
if string[endp] != ')':
|
|
while endp > 0 and string[endp-1] == '\n':
|
|
endp -= 1
|
|
|
|
return node, sindex + endp
|
|
|
|
def _extractcommandsubst(parserobj, string, sindex, sxcommand=False):
|
|
if string[sindex] == '(':
|
|
raise NotImplementedError('arithmetic expansion')
|
|
#return _extractdelimitedstring(parserobj, string, sindex, '$(', '(', '(', sxcommand=True)
|
|
else:
|
|
node, si = _parsedolparen(parserobj, string, sindex)
|
|
si += 1
|
|
return ast.node(kind='commandsubstitution', command=node, pos=(sindex-2, si)), si
|
|
|
|
def _extractprocesssubst(parserobj, string, sindex):
|
|
#return _extractdelimitedstring(tok, string, sindex, starter, '(', ')', sxcommand=True)
|
|
node, si = _parsedolparen(parserobj, string, sindex)
|
|
return node, si + 1
|
|
|
|
#def _extractdelimitedstring(parserobj, string, sindex, opener, altopener, closer,
|
|
# sxcommand=False):
|
|
# parts = []
|
|
# incomment = False
|
|
# passchar = False
|
|
# nestinglevel = 1
|
|
# i = sindex
|
|
|
|
# while nestinglevel:
|
|
# if i >= len(string):
|
|
# break
|
|
# c = string[i]
|
|
# if incomment:
|
|
# if c == '\n':
|
|
# incomment = False
|
|
# i += 1
|
|
# continue
|
|
# elif passchar:
|
|
# passchar = False
|
|
# i += 1
|
|
# continue
|
|
|
|
# if sxcommand and c == '#' and (i == 0 or string[i-1] == '\n' or
|
|
# tokenizer._shellblank(string[i-1])):
|
|
# incomment = True
|
|
# i += 1
|
|
# continue
|
|
|
|
# if c == '\\':
|
|
# passchar = True
|
|
# i += 1
|
|
# continue
|
|
|
|
# if sxcommand and string[i:i+2] == '$(':
|
|
# si = i + 2
|
|
# node, si = _extractcommandsubst(parserobj, string, si, sxcommand=sxcommand)
|
|
# parts.append(node)
|
|
# i = si + 1
|
|
# continue
|
|
|
|
# if string.startswith(opener, i):
|
|
# si = i + len(opener)
|
|
# nodes, si = _extractdelimitedstring(parserobj, string, si, opener, altopener,
|
|
# closer, sxcommand=sxcommand)
|
|
# parts.extend(nodes)
|
|
# i = si + 1
|
|
# continue
|
|
|
|
# if string.startswith(altopener, i):
|
|
# si = i + len(altopener)
|
|
# nodes, si = _extractdelimitedstring(parserobj, string, si, altopener, altopener,
|
|
# closer, sxcommand=sxcommand)
|
|
# parts.extend(nodes)
|
|
# i = si + 1
|
|
# continue
|
|
|
|
# # 1327
|
|
# if string.startswith(closer, i):
|
|
# i += len(closer) - 1
|
|
# nestinglevel -= 1
|
|
# if nestinglevel == 0:
|
|
# break
|
|
|
|
# if c == '`':
|
|
# si = i + 1
|
|
# t = _stringextract(string, si, '`', sxcommand=sxcommand)
|
|
# i = si + 1
|
|
# continue
|
|
|
|
# if c in "'\"":
|
|
# si = i +1
|
|
# if c == '"':
|
|
# i = _skipsinglequoted(string, si)
|
|
# else:
|
|
# i = _skipdoublequoted(string, si)
|
|
# continue
|
|
|
|
# i += 1
|
|
|
|
# if i == len(string) and nestinglevel:
|
|
# raise errors.ParsingError('bad substitution: no closing %r in %s' % (closer, string))
|
|
|
|
# return parts, i
|
|
|
|
def _paramexpand(parserobj, string, sindex):
|
|
node = None
|
|
zindex = sindex + 1
|
|
c = string[zindex] if zindex < len(string) else None
|
|
if c and c in '0123456789$#?-!*@':
|
|
# XXX 7685
|
|
node = ast.node(kind='parameter', value=c,
|
|
pos=(sindex, zindex+1))
|
|
elif c == '{':
|
|
# XXX 7863
|
|
# TODO not start enough, doesn't consider escaping
|
|
zindex = string.find('}', zindex + 1)
|
|
node = ast.node(kind='parameter', value=string[sindex+2:zindex],
|
|
pos=(sindex, zindex+1))
|
|
# TODO
|
|
# return _parameterbraceexpand(string, zindex)
|
|
elif c == '(':
|
|
return _extractcommandsubst(parserobj, string, zindex + 1)
|
|
elif c == '[':
|
|
raise NotImplementedError('arithmetic substitution')
|
|
#return _extractarithmeticsubst(string, zindex + 1)
|
|
else:
|
|
tindex = zindex
|
|
for zindex in range(tindex, len(string) + 1):
|
|
if zindex == len(string):
|
|
break
|
|
if not string[zindex].isalnum() and not string[zindex] == '_':
|
|
break
|
|
temp1 = string[sindex:zindex]
|
|
if temp1:
|
|
return (ast.node(kind='parameter', value=temp1[1:], pos=(sindex, zindex)),
|
|
zindex)
|
|
|
|
if zindex < len(string):
|
|
zindex += 1
|
|
|
|
return node, zindex
|
|
|
|
def _adjustpositions(node_, base, endlimit):
|
|
class v(ast.nodevisitor):
|
|
def visitnode(self, node):
|
|
assert node.pos[1] + base <= endlimit
|
|
node.pos = (node.pos[0] + base, node.pos[1] + base)
|
|
visitor = v()
|
|
visitor.visit(node_)
|
|
|
|
def _expandwordinternal(parserobj, wordtoken, qheredocument, qdoublequotes, quoted, isexp):
|
|
# bash/subst.c L8132
|
|
istring = ''
|
|
parts = []
|
|
tindex = [0]
|
|
sindex = [0]
|
|
string = wordtoken.value
|
|
def nextchar():
|
|
sindex[0] += 1
|
|
if sindex[0] < len(string):
|
|
return string[sindex[0]]
|
|
def peekchar():
|
|
if sindex[0]+1 < len(string):
|
|
return string[sindex[0]+1]
|
|
|
|
while True:
|
|
if sindex[0] == len(string):
|
|
break
|
|
# goto finished_with_string
|
|
c = string[sindex[0]]
|
|
if c in '<>':
|
|
if (nextchar() != '(' or qheredocument or qdoublequotes or
|
|
(wordtoken.flags & set([flags.word.DQUOTE, flags.word.NOPROCSUB]))):
|
|
sindex[0] -= 1
|
|
|
|
# goto add_character
|
|
sindex[0] += 1
|
|
istring += c
|
|
else:
|
|
tindex = sindex[0] + 1
|
|
|
|
node, sindex[0] = _extractprocesssubst(parserobj, string, tindex)
|
|
|
|
parts.append(ast.node(kind='processsubstitution', command=node,
|
|
pos=(tindex - 2, sindex[0])))
|
|
istring += string[tindex - 2:sindex[0]]
|
|
# goto dollar_add_string
|
|
# TODO
|
|
# elif c == '=':
|
|
# pass
|
|
# elif c == ':':
|
|
# pass
|
|
elif c == '~':
|
|
if (wordtoken.flags & set([flags.word.NOTILDE, flags.word.DQUOTE]) or
|
|
(sindex[0] > 0 and not (wordtoken.flags & flags.word.NOTILDE)) or
|
|
qdoublequotes or qheredocument):
|
|
wordtoken.flags.clear()
|
|
wordtoken.flags.add(flags.word.ITILDE)
|
|
sindex[0] += 1
|
|
istring += c
|
|
else:
|
|
stopatcolon = wordtoken.flags & set([flags.word.ASSIGNRHS,
|
|
flags.word.ASSIGNMENT,
|
|
flags.word.TILDEEXP])
|
|
expand = True
|
|
for i in range(sindex[0], len(string)):
|
|
r = string[i]
|
|
if r == '/':
|
|
break
|
|
if r in "\\'\"":
|
|
expand = False
|
|
break
|
|
if stopatcolon and r == ':':
|
|
break
|
|
else:
|
|
# go one past the end if we didn't exit early
|
|
i += 1
|
|
|
|
if i > sindex[0] and expand:
|
|
node = ast.node(kind='tilde', value=string[sindex[0]:i],
|
|
pos=(sindex[0], i))
|
|
parts.append(node)
|
|
istring += string[sindex[0]:i]
|
|
sindex[0] = i
|
|
|
|
elif c == '$' and len(string) > 1:
|
|
tindex = sindex[0]
|
|
node, sindex[0] = _paramexpand(parserobj, string, sindex[0])
|
|
if node:
|
|
parts.append(node)
|
|
istring += string[tindex:sindex[0]]
|
|
elif c == '`':
|
|
tindex = sindex[0]
|
|
# bare instance of ``
|
|
if nextchar() == '`':
|
|
sindex[0] += 1
|
|
istring += '``'
|
|
else:
|
|
x = _stringextract(string, sindex[0], "`")
|
|
if x == -1:
|
|
raise errors.ParsingError('bad substitution: no closing "`" '
|
|
'in %s' % string)
|
|
else:
|
|
if wordtoken.flags & flags.word.NOCOMSUB:
|
|
pass
|
|
else:
|
|
sindex[0] = x
|
|
|
|
word = string[tindex+1:sindex[0]]
|
|
command, ttindex = _recursiveparse(parserobj, word, 0)
|
|
_adjustpositions(command, tindex+1, len(string))
|
|
ttindex += 1 # ttindex is on the closing char
|
|
|
|
# assert sindex[0] == ttindex
|
|
# go one past the closing `
|
|
sindex[0] += 1
|
|
|
|
node = ast.node(kind='commandsubstitution',
|
|
command=command,
|
|
pos=(tindex, sindex[0]))
|
|
parts.append(node)
|
|
istring += string[tindex:sindex[0]]
|
|
|
|
elif c == '\\':
|
|
istring += string[sindex[0]+1:sindex[0]+2]
|
|
sindex[0] += 2
|
|
elif c == '"':
|
|
sindex[0] += 1
|
|
continue
|
|
|
|
# 8513
|
|
#if qdoublequotes or qheredocument:
|
|
# sindex[0] += 1
|
|
#else:
|
|
# tindex = sindex[0] + 1
|
|
# parts, sindex[0] = _stringextractdoublequoted(string, sindex[0])
|
|
# if tindex == 1 and sindex[0] == len(string):
|
|
# quotedstate = 'wholly'
|
|
# else:
|
|
# quotedstate = 'partially'
|
|
|
|
elif c == "'":
|
|
# entire string surronded by single quotes, no expansion is
|
|
# going to happen
|
|
if sindex[0] == 0 and string[-1] == "'":
|
|
return [], string[1:-1]
|
|
|
|
# check if we're inside double quotes
|
|
if not qdoublequotes:
|
|
# look for the closing ', we know we have one or otherwise
|
|
# this wouldn't tokenize due to unmatched '
|
|
tindex = sindex[0]
|
|
sindex[0] = string.find("'", sindex[0]) + 1
|
|
|
|
istring += string[tindex+1:sindex[0]-1]
|
|
else:
|
|
# this is a single quote inside double quotes, add it
|
|
istring += c
|
|
sindex[0] += 1
|
|
else:
|
|
istring += string[sindex[0]:sindex[0]+1]
|
|
sindex[0] += 1
|
|
|
|
if parts:
|
|
class v(ast.nodevisitor):
|
|
def visitnode(self, node):
|
|
assert node.pos[1] + wordtoken.lexpos <= wordtoken.endlexpos
|
|
node.pos = (node.pos[0] + wordtoken.lexpos,
|
|
node.pos[1] + wordtoken.lexpos)
|
|
visitor = v()
|
|
for node in parts:
|
|
visitor.visit(node)
|
|
|
|
return parts, istring
|
|
|
|
def _stringextract(string, sindex, charlist, sxvarname=False):
|
|
found = False
|
|
i = sindex
|
|
while i < len(string):
|
|
c = string[i]
|
|
if c == '\\':
|
|
if i + 1 < len(string):
|
|
i += 1
|
|
else:
|
|
break
|
|
elif sxvarname and c == '[':
|
|
ni = _skipsubscript(string, i, 0)
|
|
if string[ni] == ']':
|
|
i = ni
|
|
elif c in charlist:
|
|
found = True
|
|
break
|
|
else:
|
|
i += 1
|
|
if found:
|
|
return i
|
|
else:
|
|
return -1
|