diff options
author | Holger Hans Peter Freyther <zecke@selfish.org> | 2005-07-10 18:29:35 +0000 |
---|---|---|
committer | Holger Hans Peter Freyther <zecke@selfish.org> | 2005-07-10 18:29:35 +0000 |
commit | f2ff6edbddbc8be602e2513c90f4048c20e40705 (patch) | |
tree | 00dddbc5f5ca1e3a3d6ff0cb8ecf171556d4661b | |
parent | 67afbf5e1de58ee9584ccb55eca941762ccf8a0c (diff) | |
download | bitbake-f2ff6edbddbc8be602e2513c90f4048c20e40705.tar.gz |
Start with the excellent lexer and lemon Grammar from
Marc Singer and turn it into a C module (after pybison
was a total failure).
Lexer Changes:
Do not allow unquoted R_VALUE
Grammar Changes:
FILES_${PN} () {
}
T_VARIABLE is a legal start of a procedure
-rw-r--r-- | lib/bb/parse/parse_c/bitbakeparser.l | 313 | ||||
-rw-r--r-- | lib/bb/parse/parse_c/bitbakeparser.y | 168 |
2 files changed, 481 insertions, 0 deletions
diff --git a/lib/bb/parse/parse_c/bitbakeparser.l b/lib/bb/parse/parse_c/bitbakeparser.l new file mode 100644 index 000000000..be749ea81 --- /dev/null +++ b/lib/bb/parse/parse_c/bitbakeparser.l @@ -0,0 +1,313 @@ +/* bbf.flex + + written by Marc Singer + 6 January 2005 + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. + + DESCRIPTION + ----------- + + flex lexer specification for a BitBake input file parser. + + Unfortunately, flex doesn't welcome comments within the rule sets. + I say unfortunately because this lexer is unreasonably complex and + comments would make the code much easier to comprehend. + + The BitBake grammar is not regular. In order to interpret all + of the available input files, the lexer maintains much state as it + parses. There are places where this lexer will emit tokens that + are invalid. The parser will tend to catch these. + + The lexer requires C++ at the moment. The only reason for this has + to do with a very small amount of managed state. Producing a C + lexer should be a reasonably easy task as long as the %reentrant + option is used. + + + NOTES + ----- + + o RVALUES. There are three kinds of RVALUES. There are unquoted + values, double quote enclosed strings, and single quote + strings. Quoted strings may contain unescaped quotes (of either + type), *and* any type may span more than one line by using a + continuation '\' at the end of the line. This requires us to + recognize all types of values with a single expression. + Moreover, the only reason to quote a value is to include + trailing or leading whitespace. Whitespace within a value is + preserved, ugh. + + o CLASSES. C_ patterns define classes. Classes ought not include + a repitition operator, instead letting the reference to the class + define the repitition count. + + C_SS - symbol start + C_SB - symbol body + C_SP - whitespace + +*/ + +%option never-interactive +%option yylineno +%option noyywrap +%option reentrant stack + + +%{ + +#include "standard.h" +#include "token.h" +#include "filestack.h" +#include "lexer.h" +#include "arguments.h" +#include <ctype.h> + +extern void *bbparseAlloc(void *(*mallocProc)(size_t)); +extern void bbparseFree(void *p, void (*freeProc)(void*)); +extern void *bbparseAlloc(void *(*mallocProc)(size_t)); +extern void *bbparse(void*, int, token_t, lex_t*); +extern void bbparseTrace(FILE *TraceFILE, char *zTracePrompt); + +//static const char* rgbInput; +//static size_t cbInput; + + +int lineError; +int errorParse; + +enum { + errorNone = 0, + errorUnexpectedInput, + errorUnsupportedFeature, +}; + +#define YY_EXTRA_TYPE lex_t* + + /* Read from buffer */ +#define YY_INPUT(rgb,result,cbMax) \ + { if (cbMax > yyextra->cbInput) cbMax = yyextra->cbInput; \ + memcpy (rgb, yyextra->rgbInput, cbMax); \ + yyextra->cbInput -= cbMax; yyextra->rgbInput += cbMax; \ + result = cbMax ? cbMax : YY_NULL; } + +//#define YY_DECL static size_t yylex () + +#define ERROR(e) \ + do { lineError = yylineno; errorParse = e; yyterminate (); } while (0) + +static const char* fixup_escapes (const char* sz); + +%} + + +C_SP [ \t] +COMMENT #.*\n +OP_ASSIGN "=" +OP_IMMEDIATE ":=" +OP_PREPEND "=+" +OP_APPEND "+=" +OP_COND "?=" +B_OPEN "{" +B_CLOSE "}" + +K_ADDTASK "addtask" +K_ADDHANDLER "addhandler" +K_AFTER "after" +K_BEFORE "before" +K_DEF "def" +K_INCLUDE "include" +K_INHERIT "inherit" +K_PYTHON "python" +K_FAKEROOT "fakeroot" +K_EXPORT "export" +K_EXPORT_FUNC "EXPORT_FUNCTIONS" + +STRING \"([^\n\r]|"\\\n")*\" +SSTRING \'([^\n\r]|"\\\n")*\' +VALUE ([^'" \t\n])|([^'" \t\n]([^\n]|(\\\n))*[^'" \t\n]) + +C_SS [a-zA-Z_] +C_SB [a-zA-Z0-9_+-.] +REF $\{{C_SS}{C_SB}*\} +SYMBOL {C_SS}{C_SB}* +VARIABLE $?{C_SS}({C_SB}*|{REF})*(\[[a-zA-Z0-9_]*\])? +FILENAME ([a-zA-Z_./]|{REF})(([-+a-zA-Z0-9_./]*)|{REF})* + +PROC \({C_SP}*\) + +%s S_DEF +%s S_DEF_ARGS +%s S_DEF_BODY +%s S_FUNC +%s S_INCLUDE +%s S_INHERIT +%s S_PROC +%s S_RVALUE +%s S_TASK + +%% + +{OP_APPEND} { BEGIN S_RVALUE; + yyextra->accept (T_OP_APPEND); } +{OP_PREPEND} { BEGIN S_RVALUE; + yyextra->accept (T_OP_PREPEND); } +{OP_IMMEDIATE} { BEGIN S_RVALUE; + yyextra->accept (T_OP_IMMEDIATE); } +{OP_ASSIGN} { BEGIN S_RVALUE; + yyextra->accept (T_OP_ASSIGN); } +{OP_COND} { BEGIN S_RVALUE; + yyextra->accept (T_OP_COND); } + +<S_RVALUE>\\\n{C_SP}* { } +<S_RVALUE>{STRING} { BEGIN INITIAL; + size_t cb = yyleng; + while (cb && isspace (yytext[cb - 1])) + --cb; + yytext[cb - 1] = 0; + yyextra->accept (T_STRING, yytext + 1); } +<S_RVALUE>{SSTRING} { BEGIN INITIAL; + size_t cb = yyleng; + while (cb && isspace (yytext[cb - 1])) + --cb; + yytext[cb - 1] = 0; + yyextra->accept (T_STRING, yytext + 1); } + +<S_RVALUE>{VALUE} { ERROR (errorUnexpectedInput); } +<S_RVALUE>{C_SP}*\n+ { BEGIN INITIAL; + yyextra->accept (T_STRING, NULL); } + +{K_INCLUDE} { BEGIN S_INCLUDE; + yyextra->accept (T_INCLUDE); } +{K_INHERIT} { BEGIN S_INHERIT; + yyextra->accept (T_INHERIT); } +{K_ADDTASK} { BEGIN S_TASK; + yyextra->accept (T_ADDTASK); } +{K_ADDHANDLER} { yyextra->accept (T_ADDHANDLER); } +{K_EXPORT_FUNC} { BEGIN S_FUNC; + yyextra->accept (T_EXPORT_FUNC); } +<S_TASK>{K_BEFORE} { yyextra->accept (T_BEFORE); } +<S_TASK>{K_AFTER} { yyextra->accept (T_AFTER); } +<INITIAL>{K_EXPORT} { yyextra->accept (T_EXPORT); } + +<INITIAL>{K_FAKEROOT} { yyextra->accept (T_FAKEROOT); } +<INITIAL>{K_PYTHON} { yyextra->accept (T_PYTHON); } +{PROC}{C_SP}*{BOPEN}{C_SP}*\n* { BEGIN S_PROC; + yyextra->accept (T_PROC_OPEN); } +<S_PROC>{BCLOSE}{C_SP}*\n* { BEGIN INITIAL; + yyextra->accept (T_PROC_CLOSE); } +<S_PROC>([^}][^\n]*)?\n* { yyextra->accept (T_PROC_BODY, yytext); } + +{K_DEF} { BEGIN S_DEF; } +<S_DEF>{SYMBOL} { BEGIN S_DEF_ARGS; + yyextra->accept (T_SYMBOL, yytext); } +<S_DEF_ARGS>[^\n:]*: { yyextra->accept (T_DEF_ARGS, yytext); } +<S_DEF_ARGS>{C_SP}*\n { BEGIN S_DEF_BODY; } +<S_DEF_BODY>{C_SP}+[^\n]*\n { yyextra->accept (T_DEF_BODY, yytext); } +<S_DEF_BODY>\n { yyextra->accept (T_DEF_BODY, yytext); } +<S_DEF_BODY>. { BEGIN INITIAL; unput (yytext[0]); } + +{COMMENT} { } + +<INITIAL>{SYMBOL} { yyextra->accept (T_SYMBOL, yytext); } +<INITIAL>{VARIABLE} { yyextra->accept (T_VARIABLE, yytext); } + +<S_TASK>{SYMBOL} { yyextra->accept (T_TSYMBOL, yytext); } +<S_FUNC>{SYMBOL} { yyextra->accept (T_FSYMBOL, yytext); } +<S_INHERIT>{SYMBOL} { yyextra->accept (T_ISYMBOL, yytext); } +<S_INCLUDE>{FILENAME} { BEGIN INITIAL; + yyextra->accept (T_ISYMBOL, yytext); } + +<S_TASK>\n { BEGIN INITIAL; } +<S_FUNC>\n { BEGIN INITIAL; } +<S_INHERIT>\n { BEGIN INITIAL; } + +[ \t\r\n] /* Insignificant whitespace */ + +. { ERROR (errorUnexpectedInput); } + + /* Check for premature termination */ +<<EOF>> { return T_EOF; } + +%% + +void lex_t::accept (int token, const char* sz) +{ + token_t t; + memset (&t, 0, sizeof (t)); + if (sz) { + t.sz = new char [strlen (sz) + 1]; + strcpy (t.sz, sz); + } + struct yyguts_t * yyg = (struct yyguts_t*)scanner; + extern char* token_name[]; + extern char* state_name[]; + if (arguments.show_tokens) + printf (" <%s> %s %-*.*s\n", + state_name[(yyg->yy_start - 1)/2], token_name[token], + 60, 60, sz ? sz : ""); + parse (parser, token, t, this); +} + +int lex_t::line (void) +{ + return yyget_lineno (scanner); +} + +const char* lex_t::filename (void) +{ + return mf->m_szPath; +} + +void parse (MappedFile* mf) +{ + void* parser = bbparseAlloc (malloc); + yyscan_t scanner; + lex_t lex; + + yylex_init (&scanner); +// printf ("scanner create %p\n", scanner); + + lex.parser = parser; + lex.scanner = scanner; + lex.mf = mf; + lex.rgbInput = mf->m_rgb; + lex.cbInput = mf->m_cb; + lex.parse = bbparse; + yyset_extra (&lex, scanner); + +#if defined (USE_TRACE_LOG) + FILE* fp = fopen ("trace.log", "a+"); + bbparseTrace (fp, ""); +#endif + + int result = yylex (scanner); + + lex.accept (0); + bbparseTrace (NULL, NULL);; + + if (result != T_EOF) + WARNING ("premature end of file\n"); +// printf ("scanner release %p\n", scanner); + yylex_destroy (scanner); + +// printf ("yylex return %d\n", result); + +#if defined (USE_TRACE_LOG) + fclose (fp); +#endif + bbparseFree (parser, free); +} diff --git a/lib/bb/parse/parse_c/bitbakeparser.y b/lib/bb/parse/parse_c/bitbakeparser.y new file mode 100644 index 000000000..b4cd73fb6 --- /dev/null +++ b/lib/bb/parse/parse_c/bitbakeparser.y @@ -0,0 +1,168 @@ +/* bbp.lemon + + written by Marc Singer + 6 January 2005 + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. + + DESCRIPTION + ----------- + + lemon parser specification file for a BitBake input file parser. + + Most of the interesting shenanigans are done in the lexer. The + BitBake grammar is not regular. In order to emit tokens that + the parser can properly interpret in LALR fashion, the lexer + manages the interpretation state. This is why there are ISYMBOLs, + SYMBOLS, and TSYMBOLS. + + This parser was developed by reading the limited available + documentation for BitBake and by analyzing the available BB files. + There is no assertion of correctness to be made about this parser. + +*/ + +%token_type {token_t} +%name bbparse +%token_prefix T_ +%extra_argument {lex_t* lex} + +%include { +#include <iostream> +#include "standard.h" +} + + +%token_destructor { $$.release_this (); } + +%syntax_error { printf ("%s:%d: syntax error\n", + lex->filename (), lex->line ()); } + +program ::= statements. + +statements ::= statements statement. +statements ::= . + +variable(r) ::= SYMBOL(s). + { r.sz = s.sz; s.sz = NULL; + s.release_this (); } +variable(r) ::= VARIABLE(v). + { char* sz = e_interpolate (v.sz); + if (sz) { r.sz = sz; delete v.sz; } + else { r.sz = v.sz; } + v.sz = NULL; + v.release_this (); } + +statement ::= EXPORT variable(s) OP_ASSIGN STRING(v). + { e_assign (s.sz, v.sz); e_export (s.sz); + s.release_this (); v.release_this (); } +statement ::= EXPORT variable(s) OP_IMMEDIATE STRING(v). + { e_immediate (s.sz, v.sz); e_export (s.sz); + s.release_this (); v.release_this (); } + +statement ::= EXPORT variable(s) OP_COND STRING(v). + { e_cond (s.sz, v.sz); e_export (s.sz); + s.release_this (); v.release_this (); } + +statement ::= variable(s) OP_ASSIGN STRING(v). + { e_assign (s.sz, v.sz); + s.release_this (); v.release_this (); } +statement ::= variable(s) OP_PREPEND STRING(v). + { e_prepend (s.sz, v.sz); + s.release_this (); v.release_this (); } +statement ::= variable(s) OP_APPEND STRING(v). + { e_append (s.sz, v.sz); + s.release_this (); v.release_this (); } +statement ::= variable(s) OP_IMMEDIATE STRING(v). + { e_immediate (s.sz, v.sz); + s.release_this (); v.release_this (); } +statement ::= variable(s) OP_COND STRING(v). + { e_cond (s.sz, v.sz); + s.release_this (); v.release_this (); } + +task ::= TSYMBOL(t) BEFORE TSYMBOL(b) AFTER TSYMBOL(a). + { e_addtask (t.sz, b.sz, a.sz); + t.release_this (); b.release_this (); a.release_this (); } +task ::= TSYMBOL(t) AFTER TSYMBOL(a) BEFORE TSYMBOL(b). + { e_addtask (t.sz, b.sz, a.sz); + t.release_this (); a.release_this (); b.release_this (); } +task ::= TSYMBOL(t). + { e_addtask (t.sz, NULL, NULL); + t.release_this ();} +task ::= TSYMBOL(t) BEFORE TSYMBOL(b). + { e_addtask (t.sz, b.sz, NULL); + t.release_this (); b.release_this (); } +task ::= TSYMBOL(t) AFTER TSYMBOL(a). + { e_addtask (t.sz, NULL, a.sz); + t.release_this (); a.release_this (); } +tasks ::= tasks task. +tasks ::= task. +statement ::= ADDTASK tasks. + +statement ::= ADDHANDLER SYMBOL(s). + { e_addhandler (s.sz); s.release_this (); } + +func ::= FSYMBOL(f). { e_export_func (f.sz); f.release_this (); } +funcs ::= funcs func. +funcs ::= func. +statement ::= EXPORT_FUNC funcs. + +inherit ::= ISYMBOL(i). { e_inherit (i.sz); i.release_this (); } +inherits ::= inherits inherit. +inherits ::= inherit. +statement ::= INHERIT inherits. + +statement ::= INCLUDE ISYMBOL(i). + { e_include (i.sz); i.release_this (); } + +proc_body(r) ::= proc_body(l) PROC_BODY(b). + { /* concatenate body lines */ + size_t cb = (l.sz ? strlen (l.sz) : 0) + strlen (b.sz) + 1; + r.sz = new char[cb]; + *r.sz = 0; + if (l.sz) strcat (r.sz, l.sz); + strcat (r.sz, b.sz); + l.release_this (); + b.release_this (); + } +proc_body(b) ::= . { b.sz = 0; } +statement ::= variable(p) PROC_OPEN proc_body(b) PROC_CLOSE. + { e_proc (p.sz, b.sz); + p.release_this (); b.release_this (); } +statement ::= PYTHON SYMBOL(p) PROC_OPEN proc_body(b) PROC_CLOSE. + { e_proc_python (p.sz, b.sz); + p.release_this (); b.release_this (); } +statement ::= PYTHON PROC_OPEN proc_body(b) PROC_CLOSE. + { e_proc_python (NULL, b.sz); + b.release_this (); } + +statement ::= FAKEROOT SYMBOL(p) PROC_OPEN proc_body(b) PROC_CLOSE. + { e_proc_fakeroot (p.sz, b.sz); + p.release_this (); b.release_this (); } + +def_body(r) ::= def_body(l) DEF_BODY(b). + { /* concatenate body lines */ + size_t cb = (l.sz ? strlen (l.sz) : 0) + strlen (b.sz); + r.sz = new char[cb + 1]; + *r.sz = 0; + if (l.sz) strcat (r.sz, l.sz); + strcat (r.sz, b.sz); + l.release_this (); b.release_this (); + } +def_body(b) ::= . { b.sz = 0; } +statement ::= SYMBOL(p) DEF_ARGS(a) def_body(b). + { e_def (p.sz, a.sz, b.sz); + p.release_this(); a.release_this (); b.release_this (); } |