aboutsummaryrefslogtreecommitdiffstats
path: root/meta
ModeNameSize
-rw-r--r--COPYING.GPLv217987logstatsplain
-rw-r--r--COPYING.MIT1035logstatsplain
d---------classes8094logstatsplain
d---------conf838logstatsplain
d---------files335logstatsplain
d---------lib60logstatsplain
d---------recipes-bsp728logstatsplain
d---------recipes-connectivity811logstatsplain
d---------recipes-core1408logstatsplain
d---------recipes-devtools3273logstatsplain
d---------recipes-extended2908logstatsplain
d---------recipes-gnome673logstatsplain
d---------recipes-graphics1769logstatsplain
d---------recipes-kernel675logstatsplain
d---------recipes-lsb464logstatsplain
d---------recipes-multimedia777logstatsplain
d---------recipes-rt102logstatsplain
d---------recipes-sato883logstatsplain
d---------recipes-support2540logstatsplain
-rw-r--r--recipes.txt1333logstatsplain
d---------site1613logstatsplain
(self, buf, result, eof): if not buf: raise NeedMore() if buf[0]=='\n': #Remove line continuations result[:] = ['', '', ''] elif self._is_escapable(buf[0]): result[-1] += buf[0] result += [''] else: #Keep as such result[:] = ['', '\\'+buf[0], ''] return 1, True def _parse_dquote(self, buf, result, eof): if not buf: raise NeedMore() pos, sep = find_chars(buf, '$\\`"') if pos==-1: raise NeedMore() result[-1] += ''.join(buf[:pos]) if sep=='"': result += ['"'] return pos+1, True else: #Keep everything until the separator and defer processing return pos, False def _parse_command(self, buf, result, eof): if not buf: raise NeedMore() chars = '$\\`"\'' if result[0] == '$(': chars += ')' pos, sep = find_chars(buf, chars) if pos == -1: raise NeedMore() result[-1] += ''.join(buf[:pos]) if (result[0]=='$(' and sep==')') or (result[0]=='`' and sep=='`'): result += [sep] return pos+1, True else: return pos, False def _parse_parameter(self, buf, result, eof): if not buf: raise NeedMore() pos, sep = find_chars(buf, '$\\`"\'}') if pos==-1: raise NeedMore() result[-1] += ''.join(buf[:pos]) if sep=='}': result += [sep] return pos+1, True else: return pos, False def _parse_dollar(self, buf, result, eof): sep = result[0] if sep=='$': if not buf: #TODO: handle empty $ raise NeedMore() if buf[0]=='(': if len(buf)==1: raise NeedMore() if buf[1]=='(': result[0] = '$((' buf[:2] = [] else: result[0] = '$(' buf[:1] = [] elif buf[0]=='{': result[0] = '${' buf[:1] = [] else: if buf[0] in self.SPECIAL_CHARSET: result[-1] = buf[0] read = 1 else: for read,c in enumerate(buf): if c not in self.NAME_CHARSET: break else: if not eof: raise NeedMore() read += 1 result[-1] += ''.join(buf[0:read]) if not result[-1]: result[:] = ['', result[0], ''] else: result += [''] return read,True sep = result[0] if sep=='$(': parsefunc = self._parse_command elif sep=='${': parsefunc = self._parse_parameter else: raise NotImplementedError(sep) pos, closed = parsefunc(buf, result, eof) return pos, closed def _parse(self, eof): buf = self._buffer stack = self._stack recurse = False while 1: if not stack or recurse: if not buf: raise NeedMore() if buf[0] not in ('"\\`$\''): raise ShellSyntaxError('Invalid quoted string sequence') stack.append([buf[0], '']) buf[:1] = [] recurse = False result = stack[-1] if result[0]=="'": parsefunc = self._parse_squote elif result[0]=='\\': parsefunc = self._parse_bquote elif result[0]=='"': parsefunc = self._parse_dquote elif result[0]=='`': parsefunc = self._parse_command elif result[0][0]=='$': parsefunc = self._parse_dollar else: raise NotImplementedError() read, closed = parsefunc(buf, result, eof) buf[:read] = [] if closed: if len(stack)>1: #Merge in parent expression parsed = stack.pop() stack[-1] += [parsed] stack[-1] += [''] else: break else: recurse = True def normalize_wordtree(wtree): """Fold back every literal sequence (delimited with empty strings) into parent sequence. """ def normalize(wtree): result = [] for part in wtree[1:-1]: if isinstance(part, list): part = normalize(part) if part[0]=='': #Move the part content back at current level result += part[1:-1] continue elif not part: #Remove empty strings continue result.append(part) if not result: result = [''] return [wtree[0]] + result + [wtree[-1]] return normalize(wtree) def make_wordtree(token, here_document=False): """Parse a delimited token and return a tree similar to the ones returned by WordLexer. token may contain any combinations of expansion/quoted fields and non-ones. """ tree = [''] remaining = token delimiters = '\\$`' if not here_document: delimiters += '\'"' while 1: pos, sep = find_chars(remaining, delimiters) if pos==-1: tree += [remaining, ''] return normalize_wordtree(tree) tree.append(remaining[:pos]) remaining = remaining[pos:] try: result, remaining = WordLexer(heredoc = here_document).add(remaining, True) except NeedMore: raise ShellSyntaxError('Invalid token "%s"') tree.append(result) def wordtree_as_string(wtree): """Rewrite an expression tree generated by make_wordtree as string.""" def visit(node, output): for child in node: if isinstance(child, list): visit(child, output) else: output.append(child) output = [] visit(wtree, output) return ''.join(output) def unquote_wordtree(wtree): """Fold the word tree while removing quotes everywhere. Other expansion sequences are joined as such. """ def unquote(wtree): unquoted = [] if wtree[0] in ('', "'", '"', '\\'): wtree = wtree[1:-1] for part in wtree: if isinstance(part, list): part = unquote(part) unquoted.append(part) return ''.join(unquoted) return unquote(wtree) class HereDocLexer: """HereDocLexer delimits whatever comes from the here-document starting newline not included to the closing delimiter line included. """ def __init__(self, op, delim): assert op in ('<<', '<<-') if not delim: raise ShellSyntaxError('invalid here document delimiter %s' % str(delim)) self._op = op self._delim = delim self._buffer = [] self._token = [] def add(self, data, eof): """If the here-document was delimited, return a tuple (content, remaining). Raise NeedMore() otherwise. """ self._buffer += list(data) self._parse(eof) token = ''.join(self._token) remaining = ''.join(self._buffer) self._token, self._remaining = [], [] return token, remaining def _parse(self, eof): while 1: #Look for first unescaped newline. Quotes may be ignored escaped = False for i,c in enumerate(self._buffer): if escaped: escaped = False elif c=='\\': escaped = True elif c=='\n': break else: i = -1 if i==-1 or self._buffer[i]!='\n': if not eof: raise NeedMore() #No more data, maybe the last line is closing delimiter line = ''.join(self._buffer) eol = '' self._buffer[:] = [] else: line = ''.join(self._buffer[:i]) eol = self._buffer[i] self._buffer[:i+1] = [] if self._op=='<<-': line = line.lstrip('\t') if line==self._delim: break self._token += [line, eol] if i==-1: break class Token: #TODO: check this is still in use OPERATOR = 'OPERATOR' WORD = 'WORD' def __init__(self): self.value = '' self.type = None def __getitem__(self, key): #Behave like a two elements tuple if key==0: return self.type if key==1: return self.value raise IndexError(key) class HereDoc: def __init__(self, op, name=None): self.op = op self.name = name self.pendings = [] TK_COMMA = 'COMMA' TK_AMPERSAND = 'AMP' TK_OP = 'OP' TK_TOKEN = 'TOKEN' TK_COMMENT = 'COMMENT' TK_NEWLINE = 'NEWLINE' TK_IONUMBER = 'IO_NUMBER' TK_ASSIGNMENT = 'ASSIGNMENT_WORD' TK_HERENAME = 'HERENAME' class Lexer: """Main lexer. Call add() until the script AST is returned. """ # Here-document handling makes the whole thing more complex because they basically # force tokens to be reordered: here-content must come right after the operator # and the here-document name, while some other tokens might be following the # here-document expression on the same line. # # So, here-doc states are basically: # *self._state==ST_NORMAL # - self._heredoc.op is None: no here-document # - self._heredoc.op is not None but name is: here-document operator matched, # waiting for the document name/delimiter # - self._heredoc.op and name are not None: here-document is ready, following # tokens are being stored and will be pushed again when the document is # completely parsed. # *self._state==ST_HEREDOC # - The here-document is being delimited by self._herelexer. Once it is done # the content is pushed in front of the pending token list then all these # tokens are pushed once again. ST_NORMAL = 'ST_NORMAL' ST_OP = 'ST_OP' ST_BACKSLASH = 'ST_BACKSLASH' ST_QUOTED = 'ST_QUOTED' ST_COMMENT = 'ST_COMMENT' ST_HEREDOC = 'ST_HEREDOC' #Match end of backquote strings RE_BACKQUOTE_END = re.compile(r'(?<!\\)(`)') def __init__(self, parent_state = None): self._input = [] self._pos = 0 self._token = '' self._type = TK_TOKEN self._state = self.ST_NORMAL self._parent_state = parent_state self._wordlexer = None self._heredoc = HereDoc(None) self._herelexer = None ### Following attributes are not used for delimiting token and can safely ### be changed after here-document detection (see _push_toke) # Count the number of tokens following a 'For' reserved word. Needed to # return an 'In' reserved word if it comes in third place. self._for_count = None def add(self, data, eof=False): """Feed the lexer with data. When eof is set to True, returns unconsumed data or raise if the lexer is in the middle of a delimiting operation. Raise NeedMore otherwise. """ self._input += list(data) self._parse(eof) self._input[:self._pos] = [] return ''.join(self._input) def _parse(self, eof): while self._state: if self._pos>=len(self._input): if not eof: raise NeedMore() elif self._state not in (self.ST_OP, self.ST_QUOTED, self.ST_HEREDOC): #Delimit the current token and leave cleanly self._push_token('') break else: #Let the sublexer handle the eof themselves pass if self._state==self.ST_NORMAL: self._parse_normal() elif self._state==self.ST_COMMENT: self._parse_comment() elif self._state==self.ST_OP: self._parse_op(eof) elif self._state==self.ST_QUOTED: self._parse_quoted(eof) elif self._state==self.ST_HEREDOC: self._parse_heredoc(eof) else: assert False, "Unknown state " + str(self._state) if self._heredoc.op is not None: raise ShellSyntaxError('missing here-document delimiter') def _parse_normal(self): c = self._input[self._pos] if c=='\n': self._push_token(c) self._token = c self._type = TK_NEWLINE self._push_token('') self._pos += 1 elif c in ('\\', '\'', '"', '`', '$'): self._state = self.ST_QUOTED elif is_partial_op(c): self._push_token(c) self._type = TK_OP self._token += c self._pos += 1 self._state = self.ST_OP elif is_blank(c): self._push_token(c) #Discard blanks self._pos += 1 elif self._token: self._token += c self._pos += 1 elif c=='#': self._state = self.ST_COMMENT self._type = TK_COMMENT self._pos += 1 else: self._pos += 1 self._token += c def _parse_op(self, eof): assert self._token while 1: if self._pos>=len(self._input): if not eof: raise NeedMore() c = '' else: c = self._input[self._pos] op = self._token + c if c and is_partial_op(op): #Still parsing an operator self._token = op self._pos += 1 else: #End of operator self._push_token(c) self._state = self.ST_NORMAL break def _parse_comment(self): while 1: if self._pos>=len(self._input): raise NeedMore() c = self._input[self._pos] if c=='\n': #End of comment, do not consume the end of line self._state = self.ST_NORMAL break else: self._token += c self._pos += 1 def _parse_quoted(self, eof): """Precondition: the starting backquote/dollar is still in the input queue.""" if not self._wordlexer: self._wordlexer = WordLexer() if self._pos<len(self._input): #Transfer input queue character into the subparser input = self._input[self._pos:] self._pos += len(input) wtree, remaining = self._wordlexer.add(input, eof) self._wordlexer = None self._token += wordtree_as_string(wtree) #Put unparsed character back in the input queue if remaining: self._input[self._pos:self._pos] = list(remaining) self._state = self.ST_NORMAL def _parse_heredoc(self, eof): assert not self._token if self._herelexer is None: self._herelexer = HereDocLexer(self._heredoc.op, self._heredoc.name) if self._pos<len(self._input): #Transfer input queue character into the subparser input = self._input[self._pos:] self._pos += len(input) self._token, remaining = self._herelexer.add(input, eof) #Reset here-document state self._herelexer = None heredoc, self._heredoc = self._heredoc, HereDoc(None) if remaining: self._input[self._pos:self._pos] = list(remaining) self._state = self.ST_NORMAL #Push pending tokens heredoc.pendings[:0] = [(self._token, self._type, heredoc.name)] for token, type, delim in heredoc.pendings: self._token = token self._type = type self._push_token(delim) def _push_token(self, delim): if not self._token: return 0 if self._heredoc.op is not None: if self._heredoc.name is None: #Here-document name if self._type!=TK_TOKEN: raise ShellSyntaxError("expecting here-document name, got '%s'" % self._token) self._heredoc.name = unquote_wordtree(make_wordtree(self._token)) self._type = TK_HERENAME else: #Capture all tokens until the newline starting the here-document if self._type==TK_NEWLINE: assert self._state==self.ST_NORMAL self._state = self.ST_HEREDOC self._heredoc.pendings.append((self._token, self._type, delim)) self._token = '' self._type = TK_TOKEN return 1 # BEWARE: do not change parser state from here to the end of the function: # when parsing between an here-document operator to the end of the line # tokens are stored in self._heredoc.pendings. Therefore, they will not # reach the section below. #Check operators if self._type==TK_OP: #False positive because of partial op matching op = is_op(self._token) if not op: self._type = TK_TOKEN else: #Map to the specific operator self._type = op if self._token in ('<<', '<<-'): #Done here rather than in _parse_op because there is no need #to change the parser state since we are still waiting for #the here-document name if self._heredoc.op is not None: raise ShellSyntaxError("syntax error near token '%s'" % self._token) assert self._heredoc.op is None self._heredoc.op = self._token if self._type==TK_TOKEN: if '=' in self._token and not delim: if self._token.startswith('='): #Token is a WORD... a TOKEN that is. pass else: prev = self._token[:self._token.find('=')] if is_name(prev): self._type = TK_ASSIGNMENT else: #Just a token (unspecified) pass else: reserved = get_reserved(self._token) if reserved is not None: if reserved=='In' and self._for_count!=2: #Sorry, not a reserved word after all pass else: self._type = reserved if reserved in ('For', 'Case'): self._for_count = 0 elif are_digits(self._token) and delim in ('<', '>'): #Detect IO_NUMBER self._type = TK_IONUMBER elif self._token==';': self._type = TK_COMMA elif self._token=='&': self._type = TK_AMPERSAND elif self._type==TK_COMMENT: #Comments are not part of sh grammar, ignore them self._token = '' self._type = TK_TOKEN return 0 if self._for_count is not None: #Track token count in 'For' expression to detect 'In' reserved words. #Can only be in third position, no need to go beyond self._for_count += 1 if self._for_count==3: self._for_count = None self.on_token((self._token, self._type)) self._token = '' self._type = TK_TOKEN return 1 def on_token(self, token): raise NotImplementedError tokens = [ TK_TOKEN, # To silence yacc unused token warnings # TK_COMMENT, TK_NEWLINE, TK_IONUMBER, TK_ASSIGNMENT, TK_HERENAME, ] #Add specific operators tokens += _OPERATORS.values() #Add reserved words tokens += _RESERVEDS.values() class PLYLexer(Lexer): """Bridge Lexer and PLY lexer interface.""" def __init__(self): Lexer.__init__(self) self._tokens = [] self._current = 0 self.lineno = 0 def on_token(self, token): value, type = token self.lineno = 0 t = lex.LexToken() t.value = value t.type = type t.lexer = self t.lexpos = 0 t.lineno = 0 self._tokens.append(t) def is_empty(self): return not bool(self._tokens) #PLY compliant interface def token(self): if self._current>=len(self._tokens): return None t = self._tokens[self._current] self._current += 1 return t def get_tokens(s): """Parse the input string and return a tuple (tokens, unprocessed) where tokens is a list of parsed tokens and unprocessed is the part of the input string left untouched by the lexer. """ lexer = PLYLexer() untouched = lexer.add(s, True) tokens = [] while 1: token = lexer.token() if token is None: break tokens.append(token) tokens = [(t.value, t.type) for t in tokens] return tokens, untouched