From 00a881a1adb6f656049939adecb9bf9edd29a658 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 1 Dec 2014 11:47:16 +0800 Subject: discover/grub2: improve handling of word & delimiter tokens Currently, the delimiter token handling is a little fragile: we try to ignore non-inter-word delimiters in the lexer with a selective set of regexes on the possible delimiter characters. This means we don't need to handle potential delimiters in every grammar rule, but there are other situations (not regex-able) where we may see delimters, and this will cause a parse error. Instead of relying on the regex behaviour, we have an 'inter_word' flag, which is set when we see the first word token, and cleared when we see an end-of-line token. We only emit TOKEN_DELIM when this flag is set. This means that we only get the delim tokens when they're required - when we're looking for word separators (becuase WORD DELIM WORD is distinct from WORD WORD - eg "linux /vmlinux" and "x$var"). We add a few new tests for the "menuentry" and "if" syntax, with different delimiter configurations. Signed-off-by: Jeremy Kerr --- discover/grub2/grub2-lexer.l | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'discover/grub2/grub2-lexer.l') diff --git a/discover/grub2/grub2-lexer.l b/discover/grub2/grub2-lexer.l index 066af60..52575e3 100644 --- a/discover/grub2/grub2-lexer.l +++ b/discover/grub2/grub2-lexer.l @@ -22,14 +22,13 @@ void yyerror(struct grub2_parser *parser, const char *fmt, ...); %x dqstring WORD [^{}|&$;<> \t\n'"#]+ +DELIM [ \t]+ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) %% - /* discard leading & trailing whitespace, but keep inter-word delimeters */ -^[ \t]+ ; -[ \t]+$ ; -[ \t]+ return TOKEN_DELIM; + /* discard whitespace, unless we're looking for inter-word delimiters */ +{DELIM} { if (yyget_extra(yyscanner)->inter_word) return TOKEN_DELIM; } /* reserved words */ "[[" return TOKEN_LDSQBRACKET; @@ -56,6 +55,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) /* anything that's not a metachar: return as a plain word */ {WORD} { yylval->word = create_word_text(yyget_extra(yyscanner), yytext); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } @@ -68,6 +68,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) yytext++; yylval->word = create_word_var(yyget_extra(yyscanner), yytext, true); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } @@ -80,6 +81,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) } [^']+ { yylval->word = create_word_text(yyget_extra(yyscanner), yytext); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } @@ -92,6 +94,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) } ([^"\$]|\\\")+ { yylval->word = create_word_text(yyget_extra(yyscanner), yytext); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } \${VARNAME} | @@ -103,17 +106,21 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#]) yytext++; yylval->word = create_word_var(yyget_extra(yyscanner), yytext, false); + yyget_extra(yyscanner)->inter_word = true; return TOKEN_WORD; } /* blocks */ -"{" return '{'; -"}" return '}'; +"{" { yyget_extra(yyscanner)->inter_word = false; return '{'; } +"}" { yyget_extra(yyscanner)->inter_word = false; return '}'; } /* end-of-line */ -[ \t]*(;|\n)[ \t]* return TOKEN_EOL; +[ \t]*(;|\n)[ \t]* { + yyget_extra(yyscanner)->inter_word = false; + return TOKEN_EOL; + } /* strip comments */ #.* ; -- cgit v1.2.1