summaryrefslogtreecommitdiffstats
path: root/discover/grub2/grub2-lexer.l
diff options
context:
space:
mode:
authorJeremy Kerr <jk@ozlabs.org>2014-12-01 11:47:16 +0800
committerJeremy Kerr <jk@ozlabs.org>2014-12-01 12:09:53 +0800
commit00a881a1adb6f656049939adecb9bf9edd29a658 (patch)
tree20fb132fd91c7ce718244ab9884efe0859e7eef0 /discover/grub2/grub2-lexer.l
parent46fbb1e1e33b130284ec211f9c647268ff709ba7 (diff)
downloadtalos-petitboot-00a881a1adb6f656049939adecb9bf9edd29a658.tar.gz
talos-petitboot-00a881a1adb6f656049939adecb9bf9edd29a658.zip
discover/grub2: improve handling of word & delimiter tokens
Currently, the delimiter token handling is a little fragile: we try to ignore non-inter-word delimiters in the lexer with a selective set of regexes on the possible delimiter characters. This means we don't need to handle potential delimiters in every grammar rule, but there are other situations (not regex-able) where we may see delimters, and this will cause a parse error. Instead of relying on the regex behaviour, we have an 'inter_word' flag, which is set when we see the first word token, and cleared when we see an end-of-line token. We only emit TOKEN_DELIM when this flag is set. This means that we only get the delim tokens when they're required - when we're looking for word separators (becuase WORD DELIM WORD is distinct from WORD WORD - eg "linux /vmlinux" and "x$var"). We add a few new tests for the "menuentry" and "if" syntax, with different delimiter configurations. Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Diffstat (limited to 'discover/grub2/grub2-lexer.l')
-rw-r--r--discover/grub2/grub2-lexer.l21
1 files changed, 14 insertions, 7 deletions
diff --git a/discover/grub2/grub2-lexer.l b/discover/grub2/grub2-lexer.l
index 066af60..52575e3 100644
--- a/discover/grub2/grub2-lexer.l
+++ b/discover/grub2/grub2-lexer.l
@@ -22,14 +22,13 @@ void yyerror(struct grub2_parser *parser, const char *fmt, ...);
%x dqstring
WORD [^{}|&$;<> \t\n'"#]+
+DELIM [ \t]+
VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
%%
- /* discard leading & trailing whitespace, but keep inter-word delimeters */
-^[ \t]+ ;
-[ \t]+$ ;
-[ \t]+ return TOKEN_DELIM;
+ /* discard whitespace, unless we're looking for inter-word delimiters */
+{DELIM} { if (yyget_extra(yyscanner)->inter_word) return TOKEN_DELIM; }
/* reserved words */
"[[" return TOKEN_LDSQBRACKET;
@@ -56,6 +55,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
/* anything that's not a metachar: return as a plain word */
{WORD} {
yylval->word = create_word_text(yyget_extra(yyscanner), yytext);
+ yyget_extra(yyscanner)->inter_word = true;
return TOKEN_WORD;
}
@@ -68,6 +68,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
yytext++;
yylval->word = create_word_var(yyget_extra(yyscanner), yytext,
true);
+ yyget_extra(yyscanner)->inter_word = true;
return TOKEN_WORD;
}
@@ -80,6 +81,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
}
<sqstring>[^']+ {
yylval->word = create_word_text(yyget_extra(yyscanner), yytext);
+ yyget_extra(yyscanner)->inter_word = true;
return TOKEN_WORD;
}
@@ -92,6 +94,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
}
<dqstring>([^"\$]|\\\")+ {
yylval->word = create_word_text(yyget_extra(yyscanner), yytext);
+ yyget_extra(yyscanner)->inter_word = true;
return TOKEN_WORD;
}
<dqstring>\${VARNAME} |
@@ -103,17 +106,21 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
yytext++;
yylval->word = create_word_var(yyget_extra(yyscanner), yytext,
false);
+ yyget_extra(yyscanner)->inter_word = true;
return TOKEN_WORD;
}
/* blocks */
-"{" return '{';
-"}" return '}';
+"{" { yyget_extra(yyscanner)->inter_word = false; return '{'; }
+"}" { yyget_extra(yyscanner)->inter_word = false; return '}'; }
/* end-of-line */
-[ \t]*(;|\n)[ \t]* return TOKEN_EOL;
+[ \t]*(;|\n)[ \t]* {
+ yyget_extra(yyscanner)->inter_word = false;
+ return TOKEN_EOL;
+ }
/* strip comments */
#.* ;
OpenPOWER on IntegriCloud