Fix a nasty bug where inside StringLiteralParser:

1. We would assume that the length of the string literal token was at least 2 2. We would allocate a buffer with size length-2 And when the stars aligned (one of which would be an invalid source location due to stale PCH) The length would be 0 and we would try to allocate a 4GB buffer. Add checks for this corner case and a bunch of asserts. (We really really should have had an assert for 1.). Note that there's no test case since I couldn't get one (it was major PITA to reproduce), maybe later. llvm-svn: 131492
author: Argyrios Kyrtzidis <akyrtzi@gmail.com> 2011-05-17 22:09:56 +0000
committer: Argyrios Kyrtzidis <akyrtzi@gmail.com> 2011-05-17 22:09:56 +0000
commit: 8b7252a8b3612b79adcd2d17c09bd6f1ab3dcca3 (patch)
tree: c169cbf616f3a298db1a6255c6a5b8a94b37c09e
parent: f15eac1110cc963cabda42759e6eb9d6f077f30a (diff)
download: bcm5719-llvm-8b7252a8b3612b79adcd2d17c09bd6f1ab3dcca3.tar.gz
bcm5719-llvm-8b7252a8b3612b79adcd2d17c09bd6f1ab3dcca3.zip
2 files changed, 23 insertions, 4 deletions
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index dcaf4457cfa..ec3d9c58d67 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -156,7 +156,9 @@ public:
   StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
                       const SourceManager &sm, const LangOptions &features,
                       const TargetInfo &target, Diagnostic *diags = 0)
-    : SM(sm), Features(features), Target(target), Diags(diags) {
+    : SM(sm), Features(features), Target(target), Diags(diags),
+      MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),
+      ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {
     init(StringToks, NumStringToks);
   }
     
@@ -165,8 +167,8 @@ public:
   bool AnyWide;
   bool Pascal;
 
-  const char *GetString() { return &ResultBuf[0]; }
-  unsigned GetStringLength() const { return ResultPtr-&ResultBuf[0]; }
+  const char *GetString() { return ResultBuf.data(); }
+  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
 
   unsigned GetNumStringChars() const {
     if (AnyWide)
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 37e7bf4d628..2c96c4d4ee2 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -840,16 +840,27 @@ StringLiteralParser::
 StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
                     Preprocessor &PP, bool Complain)
   : SM(PP.getSourceManager()), Features(PP.getLangOptions()),
-    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0) {
+    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
+    MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),
+    ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {
   init(StringToks, NumStringToks);
 }
 
 void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
+  // The literal token may have come from an invalid source location (e.g. due
+  // to a PCH error), in which case the token length will be 0.
+  if (NumStringToks == 0 || StringToks[0].getLength() < 2) {
+    hadError = true;
+    return;
+  }
+
   // Scan all of the string portions, remember the max individual token length,
   // computing a bound on the concatenated string length, and see whether any
   // piece is a wide-string.  If any of the string portions is a wide-string
   // literal, the result is a wide-string literal [C99 6.4.5p4].
+  assert(NumStringToks && "expected at least one token");
   MaxTokenLength = StringToks[0].getLength();
+  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
   SizeBound = StringToks[0].getLength()-2;  // -2 for "".
   AnyWide = StringToks[0].is(tok::wide_string_literal);
 
@@ -858,8 +869,14 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
   // Implement Translation Phase #6: concatenation of string literals
   /// (C99 5.1.1.2p1).  The common case is only one string fragment.
   for (unsigned i = 1; i != NumStringToks; ++i) {
+    if (StringToks[i].getLength() < 2) {
+      hadError = true;
+      return;
+    }
+
     // The string could be shorter than this if it needs cleaning, but this is a
     // reasonable bound, which is all we need.
+    assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
     SizeBound += StringToks[i].getLength()-2;  // -2 for "".
 
     // Remember maximum string piece length.
author	Argyrios Kyrtzidis <akyrtzi@gmail.com>	2011-05-17 22:09:56 +0000
committer	Argyrios Kyrtzidis <akyrtzi@gmail.com>	2011-05-17 22:09:56 +0000
commit	8b7252a8b3612b79adcd2d17c09bd6f1ab3dcca3 (patch)
tree	c169cbf616f3a298db1a6255c6a5b8a94b37c09e
parent	f15eac1110cc963cabda42759e6eb9d6f077f30a (diff)
download	bcm5719-llvm-8b7252a8b3612b79adcd2d17c09bd6f1ab3dcca3.tar.gz bcm5719-llvm-8b7252a8b3612b79adcd2d17c09bd6f1ab3dcca3.zip