From 00a881a1adb6f656049939adecb9bf9edd29a658 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Mon, 1 Dec 2014 11:47:16 +0800
Subject: discover/grub2: improve handling of word & delimiter tokens

Currently, the delimiter token handling is a little fragile: we try to
ignore non-inter-word delimiters in the lexer with a selective set of
regexes on the possible delimiter characters.

This means we don't need to handle potential delimiters in every grammar
rule, but there are other situations (not regex-able) where we may see
delimters, and this will cause a parse error.

Instead of relying on the regex behaviour, we have an 'inter_word' flag,
which is set when we see the first word token, and cleared when we see
an end-of-line token. We only emit TOKEN_DELIM when this flag is set.

This means that we only get the delim tokens when they're required -
when we're looking for word separators (becuase WORD DELIM WORD is
distinct from WORD WORD - eg "linux /vmlinux" and "x$var").

We add a few new tests for the "menuentry" and "if" syntax, with
different delimiter configurations.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
---
 discover/grub2/grub2-lexer.l | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'discover/grub2/grub2-lexer.l')
diff --git a/discover/grub2/grub2-lexer.l b/discover/grub2/grub2-lexer.l
index 066af60..52575e3 100644
--- a/discover/grub2/grub2-lexer.l
+++ b/discover/grub2/grub2-lexer.l
@@ -22,14 +22,13 @@ void yyerror(struct grub2_parser *parser, const char *fmt, ...);
 %x dqstring
 
 WORD	[^{}|&$;<> \t\n'"#]+
+DELIM	[ \t]+
 VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
 
 %%
 
- /* discard leading & trailing whitespace, but keep inter-word delimeters */
-^[ \t]+	;
-[ \t]+$	;
-[ \t]+	return TOKEN_DELIM;
+ /* discard whitespace, unless we're looking for inter-word delimiters */
+{DELIM}	     { if (yyget_extra(yyscanner)->inter_word) return TOKEN_DELIM; }
 
  /* reserved words */
 "[["         return TOKEN_LDSQBRACKET;
@@ -56,6 +55,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
  /* anything that's not a metachar: return as a plain word */
 {WORD}	{
 		yylval->word = create_word_text(yyget_extra(yyscanner), yytext);
+		yyget_extra(yyscanner)->inter_word = true;
 		return TOKEN_WORD;
 	}
 
@@ -68,6 +68,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
 		yytext++;
 		yylval->word = create_word_var(yyget_extra(yyscanner), yytext,
 						true);
+		yyget_extra(yyscanner)->inter_word = true;
 		return TOKEN_WORD;
 	}
 
@@ -80,6 +81,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
 	}
 <sqstring>[^']+ {
 		yylval->word = create_word_text(yyget_extra(yyscanner), yytext);
+		yyget_extra(yyscanner)->inter_word = true;
 		return TOKEN_WORD;
 	}
 
@@ -92,6 +94,7 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
 	}
 <dqstring>([^"\$]|\\\")+ {
 		yylval->word = create_word_text(yyget_extra(yyscanner), yytext);
+		yyget_extra(yyscanner)->inter_word = true;
 		return TOKEN_WORD;
 	}
 <dqstring>\${VARNAME} |
@@ -103,17 +106,21 @@ VARNAME ([[:alpha:]][_[:alnum:]]*|[0-9]|[\?@\*#])
 		yytext++;
 		yylval->word = create_word_var(yyget_extra(yyscanner), yytext,
 						false);
+		yyget_extra(yyscanner)->inter_word = true;
 		return TOKEN_WORD;
 	}
 
 
 
  /* blocks */
-"{"	return '{';
-"}"	return '}';
+"{"	{ yyget_extra(yyscanner)->inter_word = false; return '{'; }
+"}"	{ yyget_extra(yyscanner)->inter_word = false; return '}'; }
 
  /* end-of-line */
-[ \t]*(;|\n)[ \t]*	return TOKEN_EOL;
+[ \t]*(;|\n)[ \t]*	{
+		yyget_extra(yyscanner)->inter_word = false;
+		return TOKEN_EOL;
+	}
 
  /* strip comments */
 #.*	;
-- 
cgit v1.2.1