diff options
author | Jeremy Kerr <jk@ozlabs.org> | 2014-12-01 11:47:16 +0800 |
---|---|---|
committer | Jeremy Kerr <jk@ozlabs.org> | 2014-12-01 12:09:53 +0800 |
commit | 00a881a1adb6f656049939adecb9bf9edd29a658 (patch) | |
tree | 20fb132fd91c7ce718244ab9884efe0859e7eef0 /discover/grub2/grub2-lexer.l | |
parent | 46fbb1e1e33b130284ec211f9c647268ff709ba7 (diff) | |
download | talos-petitboot-00a881a1adb6f656049939adecb9bf9edd29a658.tar.gz talos-petitboot-00a881a1adb6f656049939adecb9bf9edd29a658.zip |
discover/grub2: improve handling of word & delimiter tokens
Currently, the delimiter token handling is a little fragile: we try to
ignore non-inter-word delimiters in the lexer with a selective set of
regexes on the possible delimiter characters.
This means we don't need to handle potential delimiters in every grammar
rule, but there are other situations (not regex-able) where we may see
delimters, and this will cause a parse error.
Instead of relying on the regex behaviour, we have an 'inter_word' flag,
which is set when we see the first word token, and cleared when we see
an end-of-line token. We only emit TOKEN_DELIM when this flag is set.
This means that we only get the delim tokens when they're required -
when we're looking for word separators (becuase WORD DELIM WORD is
distinct from WORD WORD - eg "linux /vmlinux" and "x$var").
We add a few new tests for the "menuentry" and "if" syntax, with
different delimiter configurations.
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Diffstat (limited to 'discover/grub2/grub2-lexer.l')
-rw-r--r-- | discover/grub2/grub2-lexer.l | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/discover/grub2/grub2-lexer.l b/discover/grub2/grub2-lexer.l index 066af60..52575e3 100644 --- a/discover/grub2/grub2-lexer.l +++ b/discover/grub2/grub2-lexer.l @@ -22,14 +22,13 @@ void yyerror(struct grub2_parser *parser, const char *fmt, ...); %x dqstring WORD [^{}|&$;<> \t\n'"#]+ +DELIM [ \t]+ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) %% - /* discard leading & trailing whitespace, but keep inter-word delimeters */ -^[ \t]+ ; -[ \t]+$ ; -[ \t]+ return TOKEN_DELIM; + /* discard whitespace, unless we're looking for inter-word delimiters */ +{DELIM} { if (yyget_extra(yyscanner)->inter_word) return TOKEN_DELIM; } /* reserved words */ "[[" return TOKEN_LDSQBRACKET; @@ -56,6 +55,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) /* anything that's not a metachar: return as a plain word */ {WORD} { yylval->word = create_word_text(yyget_extra(yyscanner), yytext); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } @@ -68,6 +68,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) yytext++; yylval->word = create_word_var(yyget_extra(yyscanner), yytext, true); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } @@ -80,6 +81,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) } <sqstring>[^']+ { yylval->word = create_word_text(yyget_extra(yyscanner), yytext); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } @@ -92,6 +94,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) } <dqstring>([^"\$]|\\\")+ { yylval->word = create_word_text(yyget_extra(yyscanner), yytext); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } <dqstring>\${VARNAME} | @@ -103,17 +106,21 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) yytext++; yylval->word = create_word_var(yyget_extra(yyscanner), yytext, false); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } /* blocks */ -"{" return '{'; -"}" return '}'; +"{" { yyget_extra(yyscanner)->inter_word = false; return '{'; } +"}" { yyget_extra(yyscanner)->inter_word = false; return '}'; } /* end-of-line */ -[ \t]*(;|\n)[ \t]* return TOKEN_EOL; +[ \t]*(;|\n)[ \t]* { + yyget_extra(yyscanner)->inter_word = false; + return TOKEN_EOL; + } /* strip comments */ #.* ; |