summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/ply/lex.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/ply/lex.py')
-rw-r--r--bitbake/lib/ply/lex.py1058
1 files changed, 0 insertions, 1058 deletions
diff --git a/bitbake/lib/ply/lex.py b/bitbake/lib/ply/lex.py
deleted file mode 100644
index 267ec100fc..0000000000
--- a/bitbake/lib/ply/lex.py
+++ /dev/null
@@ -1,1058 +0,0 @@
-# -----------------------------------------------------------------------------
-# ply: lex.py
-#
-# Copyright (C) 2001-2009,
-# David M. Beazley (Dabeaz LLC)
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * Neither the name of the David Beazley or Dabeaz LLC may be used to
-# endorse or promote products derived from this software without
-# specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# -----------------------------------------------------------------------------
-
-__version__ = "3.3"
-__tabversion__ = "3.2" # Version of table file used
-
-import re, sys, types, copy, os
-
-# This tuple contains known string types
-try:
- # Python 2.6
- StringTypes = (types.StringType, types.UnicodeType)
-except AttributeError:
- # Python 3.0
- StringTypes = (str, bytes)
-
-# Extract the code attribute of a function. Different implementations
-# are for Python 2/3 compatibility.
-
-if sys.version_info[0] < 3:
- def func_code(f):
- return f.func_code
-else:
- def func_code(f):
- return f.__code__
-
-# This regular expression is used to match valid token names
-_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
-
-# Exception thrown when invalid token encountered and no default error
-# handler is defined.
-
-class LexError(Exception):
- def __init__(self,message,s):
- self.args = (message,)
- self.text = s
-
-# Token class. This class is used to represent the tokens produced.
-class LexToken(object):
- def __str__(self):
- return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos)
- def __repr__(self):
- return str(self)
-
-# This object is a stand-in for a logging object created by the
-# logging module.
-
-class PlyLogger(object):
- def __init__(self,f):
- self.f = f
- def critical(self,msg,*args,**kwargs):
- self.f.write((msg % args) + "\n")
-
- def warning(self,msg,*args,**kwargs):
- self.f.write("WARNING: "+ (msg % args) + "\n")
-
- def error(self,msg,*args,**kwargs):
- self.f.write("ERROR: " + (msg % args) + "\n")
-
- info = critical
- debug = critical
-
-# Null logger is used when no output is generated. Does nothing.
-class NullLogger(object):
- def __getattribute__(self,name):
- return self
- def __call__(self,*args,**kwargs):
- return self
-
-# -----------------------------------------------------------------------------
-# === Lexing Engine ===
-#
-# The following Lexer class implements the lexer runtime. There are only
-# a few public methods and attributes:
-#
-# input() - Store a new string in the lexer
-# token() - Get the next token
-# clone() - Clone the lexer
-#
-# lineno - Current line number
-# lexpos - Current position in the input string
-# -----------------------------------------------------------------------------
-
-class Lexer:
- def __init__(self):
- self.lexre = None # Master regular expression. This is a list of
- # tuples (re,findex) where re is a compiled
- # regular expression and findex is a list
- # mapping regex group numbers to rules
- self.lexretext = None # Current regular expression strings
- self.lexstatere = {} # Dictionary mapping lexer states to master regexs
- self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
- self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names
- self.lexstate = "INITIAL" # Current lexer state
- self.lexstatestack = [] # Stack of lexer states
- self.lexstateinfo = None # State information
- self.lexstateignore = {} # Dictionary of ignored characters for each state
- self.lexstateerrorf = {} # Dictionary of error functions for each state
- self.lexreflags = 0 # Optional re compile flags
- self.lexdata = None # Actual input data (as a string)
- self.lexpos = 0 # Current position in input text
- self.lexlen = 0 # Length of the input text
- self.lexerrorf = None # Error rule (if any)
- self.lextokens = None # List of valid tokens
- self.lexignore = "" # Ignored characters
- self.lexliterals = "" # Literal characters that can be passed through
- self.lexmodule = None # Module
- self.lineno = 1 # Current line number
- self.lexoptimize = 0 # Optimized mode
-
- def clone(self,object=None):
- c = copy.copy(self)
-
- # If the object parameter has been supplied, it means we are attaching the
- # lexer to a new object. In this case, we have to rebind all methods in
- # the lexstatere and lexstateerrorf tables.
-
- if object:
- newtab = { }
- for key, ritem in self.lexstatere.items():
- newre = []
- for cre, findex in ritem:
- newfindex = []
- for f in findex:
- if not f or not f[0]:
- newfindex.append(f)
- continue
- newfindex.append((getattr(object,f[0].__name__),f[1]))
- newre.append((cre,newfindex))
- newtab[key] = newre
- c.lexstatere = newtab
- c.lexstateerrorf = { }
- for key, ef in self.lexstateerrorf.items():
- c.lexstateerrorf[key] = getattr(object,ef.__name__)
- c.lexmodule = object
- return c
-
- # ------------------------------------------------------------
- # writetab() - Write lexer information to a table file
- # ------------------------------------------------------------
- def writetab(self,tabfile,outputdir=""):
- if isinstance(tabfile,types.ModuleType):
- return
- basetabfilename = tabfile.split(".")[-1]
- filename = os.path.join(outputdir,basetabfilename)+".py"
- tf = open(filename,"w")
- tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
- tf.write("_tabversion = %s\n" % repr(__version__))
- tf.write("_lextokens = %s\n" % repr(self.lextokens))
- tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
- tf.write("_lexliterals = %s\n" % repr(self.lexliterals))
- tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))
-
- tabre = { }
- # Collect all functions in the initial state
- initial = self.lexstatere["INITIAL"]
- initialfuncs = []
- for part in initial:
- for f in part[1]:
- if f and f[0]:
- initialfuncs.append(f)
-
- for key, lre in self.lexstatere.items():
- titem = []
- for i in range(len(lre)):
- titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i])))
- tabre[key] = titem
-
- tf.write("_lexstatere = %s\n" % repr(tabre))
- tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))
-
- taberr = { }
- for key, ef in self.lexstateerrorf.items():
- if ef:
- taberr[key] = ef.__name__
- else:
- taberr[key] = None
- tf.write("_lexstateerrorf = %s\n" % repr(taberr))
- tf.close()
-
- # ------------------------------------------------------------
- # readtab() - Read lexer information from a tab file
- # ------------------------------------------------------------
- def readtab(self,tabfile,fdict):
- if isinstance(tabfile,types.ModuleType):
- lextab = tabfile
- else:
- if sys.version_info[0] < 3:
- exec("import %s as lextab" % tabfile)
- else:
- env = { }
- exec("import %s as lextab" % tabfile, env,env)
- lextab = env['lextab']
-
- if getattr(lextab,"_tabversion","0.0") != __version__:
- raise ImportError("Inconsistent PLY version")
-
- self.lextokens = lextab._lextokens
- self.lexreflags = lextab._lexreflags
- self.lexliterals = lextab._lexliterals
- self.lexstateinfo = lextab._lexstateinfo
- self.lexstateignore = lextab._lexstateignore
- self.lexstatere = { }
- self.lexstateretext = { }
- for key,lre in lextab._lexstatere.items():
- titem = []
- txtitem = []
- for i in range(len(lre)):
- titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict)))
- txtitem.append(lre[i][0])
- self.lexstatere[key] = titem
- self.lexstateretext[key] = txtitem
- self.lexstateerrorf = { }
- for key,ef in lextab._lexstateerrorf.items():
- self.lexstateerrorf[key] = fdict[ef]
- self.begin('INITIAL')
-
- # ------------------------------------------------------------
- # input() - Push a new string into the lexer
- # ------------------------------------------------------------
- def input(self,s):
- # Pull off the first character to see if s looks like a string
- c = s[:1]
- if not isinstance(c,StringTypes):
- raise ValueError("Expected a string")
- self.lexdata = s
- self.lexpos = 0
- self.lexlen = len(s)
-
- # ------------------------------------------------------------
- # begin() - Changes the lexing state
- # ------------------------------------------------------------
- def begin(self,state):
- if not state in self.lexstatere:
- raise ValueError("Undefined state")
- self.lexre = self.lexstatere[state]
- self.lexretext = self.lexstateretext[state]
- self.lexignore = self.lexstateignore.get(state,"")
- self.lexerrorf = self.lexstateerrorf.get(state,None)
- self.lexstate = state
-
- # ------------------------------------------------------------
- # push_state() - Changes the lexing state and saves old on stack
- # ------------------------------------------------------------
- def push_state(self,state):
- self.lexstatestack.append(self.lexstate)
- self.begin(state)
-
- # ------------------------------------------------------------
- # pop_state() - Restores the previous state
- # ------------------------------------------------------------
- def pop_state(self):
- self.begin(self.lexstatestack.pop())
-
- # ------------------------------------------------------------
- # current_state() - Returns the current lexing state
- # ------------------------------------------------------------
- def current_state(self):
- return self.lexstate
-
- # ------------------------------------------------------------
- # skip() - Skip ahead n characters
- # ------------------------------------------------------------
- def skip(self,n):
- self.lexpos += n
-
- # ------------------------------------------------------------
- # opttoken() - Return the next token from the Lexer
- #
- # Note: This function has been carefully implemented to be as fast
- # as possible. Don't make changes unless you really know what
- # you are doing
- # ------------------------------------------------------------
- def token(self):
- # Make local copies of frequently referenced attributes
- lexpos = self.lexpos
- lexlen = self.lexlen
- lexignore = self.lexignore
- lexdata = self.lexdata
-
- while lexpos < lexlen:
- # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
- if lexdata[lexpos] in lexignore:
- lexpos += 1
- continue
-
- # Look for a regular expression match
- for lexre,lexindexfunc in self.lexre:
- m = lexre.match(lexdata,lexpos)
- if not m: continue
-
- # Create a token for return
- tok = LexToken()
- tok.value = m.group()
- tok.lineno = self.lineno
- tok.lexpos = lexpos
-
- i = m.lastindex
- func,tok.type = lexindexfunc[i]
-
- if not func:
- # If no token type was set, it's an ignored token
- if tok.type:
- self.lexpos = m.end()
- return tok
- else:
- lexpos = m.end()
- break
-
- lexpos = m.end()
-
- # If token is processed by a function, call it
-
- tok.lexer = self # Set additional attributes useful in token rules
- self.lexmatch = m
- self.lexpos = lexpos
-
- newtok = func(tok)
-
- # Every function must return a token, if nothing, we just move to next token
- if not newtok:
- lexpos = self.lexpos # This is here in case user has updated lexpos.
- lexignore = self.lexignore # This is here in case there was a state change
- break
-
- # Verify type of the token. If not in the token map, raise an error
- if not self.lexoptimize:
- if not newtok.type in self.lextokens:
- raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
- func_code(func).co_filename, func_code(func).co_firstlineno,
- func.__name__, newtok.type),lexdata[lexpos:])
-
- return newtok
- else:
- # No match, see if in literals
- if lexdata[lexpos] in self.lexliterals:
- tok = LexToken()
- tok.value = lexdata[lexpos]
- tok.lineno = self.lineno
- tok.type = tok.value
- tok.lexpos = lexpos
- self.lexpos = lexpos + 1
- return tok
-
- # No match. Call t_error() if defined.
- if self.lexerrorf:
- tok = LexToken()
- tok.value = self.lexdata[lexpos:]
- tok.lineno = self.lineno
- tok.type = "error"
- tok.lexer = self
- tok.lexpos = lexpos
- self.lexpos = lexpos
- newtok = self.lexerrorf(tok)
- if lexpos == self.lexpos:
- # Error method didn't change text position at all. This is an error.
- raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
- lexpos = self.lexpos
- if not newtok: continue
- return newtok
-
- self.lexpos = lexpos
- raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])
-
- self.lexpos = lexpos + 1
- if self.lexdata is None:
- raise RuntimeError("No input string given with input()")
- return None
-
- # Iterator interface
- def __iter__(self):
- return self
-
- def next(self):
- t = self.token()
- if t is None:
- raise StopIteration
- return t
-
- __next__ = next
-
-# -----------------------------------------------------------------------------
-# ==== Lex Builder ===
-#
-# The functions and classes below are used to collect lexing information
-# and build a Lexer object from it.
-# -----------------------------------------------------------------------------
-
-# -----------------------------------------------------------------------------
-# get_caller_module_dict()
-#
-# This function returns a dictionary containing all of the symbols defined within
-# a caller further down the call stack. This is used to get the environment
-# associated with the yacc() call if none was provided.
-# -----------------------------------------------------------------------------
-
-def get_caller_module_dict(levels):
- try:
- raise RuntimeError
- except RuntimeError:
- e,b,t = sys.exc_info()
- f = t.tb_frame
- while levels > 0:
- f = f.f_back
- levels -= 1
- ldict = f.f_globals.copy()
- if f.f_globals != f.f_locals:
- ldict.update(f.f_locals)
-
- return ldict
-
-# -----------------------------------------------------------------------------
-# _funcs_to_names()
-#
-# Given a list of regular expression functions, this converts it to a list
-# suitable for output to a table file
-# -----------------------------------------------------------------------------
-
-def _funcs_to_names(funclist,namelist):
- result = []
- for f,name in zip(funclist,namelist):
- if f and f[0]:
- result.append((name, f[1]))
- else:
- result.append(f)
- return result
-
-# -----------------------------------------------------------------------------
-# _names_to_funcs()
-#
-# Given a list of regular expression function names, this converts it back to
-# functions.
-# -----------------------------------------------------------------------------
-
-def _names_to_funcs(namelist,fdict):
- result = []
- for n in namelist:
- if n and n[0]:
- result.append((fdict[n[0]],n[1]))
- else:
- result.append(n)
- return result
-
-# -----------------------------------------------------------------------------
-# _form_master_re()
-#
-# This function takes a list of all of the regex components and attempts to
-# form the master regular expression. Given limitations in the Python re
-# module, it may be necessary to break the master regex into separate expressions.
-# -----------------------------------------------------------------------------
-
-def _form_master_re(relist,reflags,ldict,toknames):
- if not relist: return []
- regex = "|".join(relist)
- try:
- lexre = re.compile(regex,re.VERBOSE | reflags)
-
- # Build the index to function map for the matching engine
- lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
- lexindexnames = lexindexfunc[:]
-
- for f,i in lexre.groupindex.items():
- handle = ldict.get(f,None)
- if type(handle) in (types.FunctionType, types.MethodType):
- lexindexfunc[i] = (handle,toknames[f])
- lexindexnames[i] = f
- elif handle is not None:
- lexindexnames[i] = f
- if f.find("ignore_") > 0:
- lexindexfunc[i] = (None,None)
- else:
- lexindexfunc[i] = (None, toknames[f])
-
- return [(lexre,lexindexfunc)],[regex],[lexindexnames]
- except Exception:
- m = int(len(relist)/2)
- if m == 0: m = 1
- llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
- rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
- return llist+rlist, lre+rre, lnames+rnames
-
-# -----------------------------------------------------------------------------
-# def _statetoken(s,names)
-#
-# Given a declaration name s of the form "t_" and a dictionary whose keys are
-# state names, this function returns a tuple (states,tokenname) where states
-# is a tuple of state names and tokenname is the name of the token. For example,
-# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
-# -----------------------------------------------------------------------------
-
-def _statetoken(s,names):
- nonstate = 1
- parts = s.split("_")
- for i in range(1,len(parts)):
- if not parts[i] in names and parts[i] != 'ANY': break
- if i > 1:
- states = tuple(parts[1:i])
- else:
- states = ('INITIAL',)
-
- if 'ANY' in states:
- states = tuple(names)
-
- tokenname = "_".join(parts[i:])
- return (states,tokenname)
-
-
-# -----------------------------------------------------------------------------
-# LexerReflect()
-#
-# This class represents information needed to build a lexer as extracted from a
-# user's input file.
-# -----------------------------------------------------------------------------
-class LexerReflect(object):
- def __init__(self,ldict,log=None,reflags=0):
- self.ldict = ldict
- self.error_func = None
- self.tokens = []
- self.reflags = reflags
- self.stateinfo = { 'INITIAL' : 'inclusive'}
- self.files = {}
- self.error = 0
-
- if log is None:
- self.log = PlyLogger(sys.stderr)
- else:
- self.log = log
-
- # Get all of the basic information
- def get_all(self):
- self.get_tokens()
- self.get_literals()
- self.get_states()
- self.get_rules()
-
- # Validate all of the information
- def validate_all(self):
- self.validate_tokens()
- self.validate_literals()
- self.validate_rules()
- return self.error
-
- # Get the tokens map
- def get_tokens(self):
- tokens = self.ldict.get("tokens",None)
- if not tokens:
- self.log.error("No token list is defined")
- self.error = 1
- return
-
- if not isinstance(tokens,(list, tuple)):
- self.log.error("tokens must be a list or tuple")
- self.error = 1
- return
-
- if not tokens:
- self.log.error("tokens is empty")
- self.error = 1
- return
-
- self.tokens = tokens
-
- # Validate the tokens
- def validate_tokens(self):
- terminals = {}
- for n in self.tokens:
- if not _is_identifier.match(n):
- self.log.error("Bad token name '%s'",n)
- self.error = 1
- if n in terminals:
- self.log.warning("Token '%s' multiply defined", n)
- terminals[n] = 1
-
- # Get the literals specifier
- def get_literals(self):
- self.literals = self.ldict.get("literals","")
-
- # Validate literals
- def validate_literals(self):
- try:
- for c in self.literals:
- if not isinstance(c,StringTypes) or len(c) > 1:
- self.log.error("Invalid literal %s. Must be a single character", repr(c))
- self.error = 1
- continue
-
- except TypeError:
- self.log.error("Invalid literals specification. literals must be a sequence of characters")
- self.error = 1
-
- def get_states(self):
- self.states = self.ldict.get("states",None)
- # Build statemap
- if self.states:
- if not isinstance(self.states,(tuple,list)):
- self.log.error("states must be defined as a tuple or list")
- self.error = 1
- else:
- for s in self.states:
- if not isinstance(s,tuple) or len(s) != 2:
- self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s))
- self.error = 1
- continue
- name, statetype = s
- if not isinstance(name,StringTypes):
- self.log.error("State name %s must be a string", repr(name))
- self.error = 1
- continue
- if not (statetype == 'inclusive' or statetype == 'exclusive'):
- self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name)
- self.error = 1
- continue
- if name in self.stateinfo:
- self.log.error("State '%s' already defined",name)
- self.error = 1
- continue
- self.stateinfo[name] = statetype
-
- # Get all of the symbols with a t_ prefix and sort them into various
- # categories (functions, strings, error functions, and ignore characters)
-
- def get_rules(self):
- tsymbols = [f for f in self.ldict if f[:2] == 't_' ]
-
- # Now build up a list of functions and a list of strings
-
- self.toknames = { } # Mapping of symbols to token names
- self.funcsym = { } # Symbols defined as functions
- self.strsym = { } # Symbols defined as strings
- self.ignore = { } # Ignore strings by state
- self.errorf = { } # Error functions by state
-
- for s in self.stateinfo:
- self.funcsym[s] = []
- self.strsym[s] = []
-
- if len(tsymbols) == 0:
- self.log.error("No rules of the form t_rulename are defined")
- self.error = 1
- return
-
- for f in tsymbols:
- t = self.ldict[f]
- states, tokname = _statetoken(f,self.stateinfo)
- self.toknames[f] = tokname
-
- if hasattr(t,"__call__"):
- if tokname == 'error':
- for s in states:
- self.errorf[s] = t
- elif tokname == 'ignore':
- line = func_code(t).co_firstlineno
- file = func_code(t).co_filename
- self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__)
- self.error = 1
- else:
- for s in states:
- self.funcsym[s].append((f,t))
- elif isinstance(t, StringTypes):
- if tokname == 'ignore':
- for s in states:
- self.ignore[s] = t
- if "\\" in t:
- self.log.warning("%s contains a literal backslash '\\'",f)
-
- elif tokname == 'error':
- self.log.error("Rule '%s' must be defined as a function", f)
- self.error = 1
- else:
- for s in states:
- self.strsym[s].append((f,t))
- else:
- self.log.error("%s not defined as a function or string", f)
- self.error = 1
-
- # Sort the functions by line number
- for f in self.funcsym.values():
- if sys.version_info[0] < 3:
- f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno))
- else:
- # Python 3.0
- f.sort(key=lambda x: func_code(x[1]).co_firstlineno)
-
- # Sort the strings by regular expression length
- for s in self.strsym.values():
- if sys.version_info[0] < 3:
- s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))
- else:
- # Python 3.0
- s.sort(key=lambda x: len(x[1]),reverse=True)
-
- # Validate all of the t_rules collected
- def validate_rules(self):
- for state in self.stateinfo:
- # Validate all rules defined by functions
-
-
-
- for fname, f in self.funcsym[state]:
- line = func_code(f).co_firstlineno
- file = func_code(f).co_filename
- self.files[file] = 1
-
- tokname = self.toknames[fname]
- if isinstance(f, types.MethodType):
- reqargs = 2
- else:
- reqargs = 1
- nargs = func_code(f).co_argcount
- if nargs > reqargs:
- self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
- self.error = 1
- continue
-
- if nargs < reqargs:
- self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
- self.error = 1
- continue
-
- if not f.__doc__:
- self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__)
- self.error = 1
- continue
-
- try:
- c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags)
- if c.match(""):
- self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)
- self.error = 1
- except re.error:
- _etype, e, _etrace = sys.exc_info()
- self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e)
- if '#' in f.__doc__:
- self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__)
- self.error = 1
-
- # Validate all rules defined by strings
- for name,r in self.strsym[state]:
- tokname = self.toknames[name]
- if tokname == 'error':
- self.log.error("Rule '%s' must be defined as a function", name)
- self.error = 1
- continue
-
- if not tokname in self.tokens and tokname.find("ignore_") < 0:
- self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname)
- self.error = 1
- continue
-
- try:
- c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags)
- if (c.match("")):
- self.log.error("Regular expression for rule '%s' matches empty string",name)
- self.error = 1
- except re.error:
- _etype, e, _etrace = sys.exc_info()
- self.log.error("Invalid regular expression for rule '%s'. %s",name,e)
- if '#' in r:
- self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name)
- self.error = 1
-
- if not self.funcsym[state] and not self.strsym[state]:
- self.log.error("No rules defined for state '%s'",state)
- self.error = 1
-
- # Validate the error function
- efunc = self.errorf.get(state,None)
- if efunc:
- f = efunc
- line = func_code(f).co_firstlineno
- file = func_code(f).co_filename
- self.files[file] = 1
-
- if isinstance(f, types.MethodType):
- reqargs = 2
- else:
- reqargs = 1
- nargs = func_code(f).co_argcount
- if nargs > reqargs:
- self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
- self.error = 1
-
- if nargs < reqargs:
- self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
- self.error = 1
-
- for f in self.files:
- self.validate_file(f)
-
-
- # -----------------------------------------------------------------------------
- # validate_file()
- #
- # This checks to see if there are duplicated t_rulename() functions or strings
- # in the parser input file. This is done using a simple regular expression
- # match on each line in the given file.
- # -----------------------------------------------------------------------------
-
- def validate_file(self,filename):
- import os.path
- base,ext = os.path.splitext(filename)
- if ext != '.py': return # No idea what the file is. Return OK
-
- try:
- f = open(filename)
- lines = f.readlines()
- f.close()
- except IOError:
- return # Couldn't find the file. Don't worry about it
-
- fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
- sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
-
- counthash = { }
- linen = 1
- for l in lines:
- m = fre.match(l)
- if not m:
- m = sre.match(l)
- if m:
- name = m.group(1)
- prev = counthash.get(name)
- if not prev:
- counthash[name] = linen
- else:
- self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)
- self.error = 1
- linen += 1
-
-# -----------------------------------------------------------------------------
-# lex(module)
-#
-# Build all of the regular expression rules from definitions in the supplied module
-# -----------------------------------------------------------------------------
-def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None):
- global lexer
- ldict = None
- stateinfo = { 'INITIAL' : 'inclusive'}
- lexobj = Lexer()
- lexobj.lexoptimize = optimize
- global token,input
-
- if errorlog is None:
- errorlog = PlyLogger(sys.stderr)
-
- if debug:
- if debuglog is None:
- debuglog = PlyLogger(sys.stderr)
-
- # Get the module dictionary used for the lexer
- if object: module = object
-
- if module:
- _items = [(k,getattr(module,k)) for k in dir(module)]
- ldict = dict(_items)
- else:
- ldict = get_caller_module_dict(2)
-
- # Collect parser information from the dictionary
- linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)
- linfo.get_all()
- if not optimize:
- if linfo.validate_all():
- raise SyntaxError("Can't build lexer")
-
- if optimize and lextab:
- try:
- lexobj.readtab(lextab,ldict)
- token = lexobj.token
- input = lexobj.input
- lexer = lexobj
- return lexobj
-
- except ImportError:
- pass
-
- # Dump some basic debugging information
- if debug:
- debuglog.info("lex: tokens = %r", linfo.tokens)
- debuglog.info("lex: literals = %r", linfo.literals)
- debuglog.info("lex: states = %r", linfo.stateinfo)
-
- # Build a dictionary of valid token names
- lexobj.lextokens = { }
- for n in linfo.tokens:
- lexobj.lextokens[n] = 1
-
- # Get literals specification
- if isinstance(linfo.literals,(list,tuple)):
- lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
- else:
- lexobj.lexliterals = linfo.literals
-
- # Get the stateinfo dictionary
- stateinfo = linfo.stateinfo
-
- regexs = { }
- # Build the master regular expressions
- for state in stateinfo:
- regex_list = []
-
- # Add rules defined by functions first
- for fname, f in linfo.funcsym[state]:
- line = func_code(f).co_firstlineno
- file = func_code(f).co_filename
- regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))
- if debug:
- debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state)
-
- # Now add all of the simple rules
- for name,r in linfo.strsym[state]:
- regex_list.append("(?P<%s>%s)" % (name,r))
- if debug:
- debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)
-
- regexs[state] = regex_list
-
- # Build the master regular expressions
-
- if debug:
- debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")
-
- for state in regexs:
- lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames)
- lexobj.lexstatere[state] = lexre
- lexobj.lexstateretext[state] = re_text
- lexobj.lexstaterenames[state] = re_names
- if debug:
- for i in range(len(re_text)):
- debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i])
-
- # For inclusive states, we need to add the regular expressions from the INITIAL state
- for state,stype in stateinfo.items():
- if state != "INITIAL" and stype == 'inclusive':
- lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
- lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
- lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
-
- lexobj.lexstateinfo = stateinfo
- lexobj.lexre = lexobj.lexstatere["INITIAL"]
- lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
- lexobj.lexreflags = reflags
-
- # Set up ignore variables
- lexobj.lexstateignore = linfo.ignore
- lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
-
- # Set up error functions
- lexobj.lexstateerrorf = linfo.errorf
- lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)
- if not lexobj.lexerrorf:
- errorlog.warning("No t_error rule is defined")
-
- # Check state information for ignore and error rules
- for s,stype in stateinfo.items():
- if stype == 'exclusive':
- if not s in linfo.errorf:
- errorlog.warning("No error rule is defined for exclusive state '%s'", s)
- if not s in linfo.ignore and lexobj.lexignore:
- errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)
- elif stype == 'inclusive':
- if not s in linfo.errorf:
- linfo.errorf[s] = linfo.errorf.get("INITIAL",None)
- if not s in linfo.ignore:
- linfo.ignore[s] = linfo.ignore.get("INITIAL","")
-
- # Create global versions of the token() and input() functions
- token = lexobj.token
- input = lexobj.input
- lexer = lexobj
-
- # If in optimize mode, we write the lextab
- if lextab and optimize:
- lexobj.writetab(lextab,outputdir)
-
- return lexobj
-
-# -----------------------------------------------------------------------------
-# runmain()
-#
-# This runs the lexer as a main program
-# -----------------------------------------------------------------------------
-
-def runmain(lexer=None,data=None):
- if not data:
- try:
- filename = sys.argv[1]
- f = open(filename)
- data = f.read()
- f.close()
- except IndexError:
- sys.stdout.write("Reading from standard input (type EOF to end):\n")
- data = sys.stdin.read()
-
- if lexer:
- _input = lexer.input
- else:
- _input = input
- _input(data)
- if lexer:
- _token = lexer.token
- else:
- _token = token
-
- while 1:
- tok = _token()
- if not tok: break
- sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos))
-
-# -----------------------------------------------------------------------------
-# @TOKEN(regex)
-#
-# This decorator function can be used to set the regex expression on a function
-# when its docstring might need to be set in an alternative way
-# -----------------------------------------------------------------------------
-
-def TOKEN(r):
- def set_doc(f):
- if hasattr(r,"__call__"):
- f.__doc__ = r.__doc__
- else:
- f.__doc__ = r
- return f
- return set_doc
-
-# Alternative spelling of the TOKEN decorator
-Token = TOKEN
-