19 files changed, 378 insertions, 49 deletions
diff --git a/clang/AST/ASTStreamer.cpp b/clang/AST/ASTStreamer.cpp
index bd192040d7b..7db53f3ae6f 100644
--- a/clang/AST/ASTStreamer.cpp
+++ b/clang/AST/ASTStreamer.cpp
@@ -19,7 +19,7 @@ using namespace clang;
 
 /// Interface to the Builder.cpp file.
 ///
-Action *CreateASTBuilderActions(bool FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo);
 
 
 namespace {
@@ -27,7 +27,7 @@ namespace {
     Parser P;
   public:
     ASTStreamer(Preprocessor &PP, unsigned MainFileID, bool FullLocInfo)
-      : P(PP, *CreateASTBuilderActions(FullLocInfo)) {
+      : P(PP, *CreateASTBuilderActions(PP, FullLocInfo)) {
       PP.EnterSourceFile(MainFileID, 0, true);
       
       // Initialize the parser.
diff --git a/clang/AST/Expr.cpp b/clang/AST/Expr.cpp
index 46051574491..f00301c759f 100644
--- a/clang/AST/Expr.cpp
+++ b/clang/AST/Expr.cpp
@@ -43,6 +43,28 @@ void FloatingConstant::dump_impl() const {
   std::cerr << "1.0";
 }
 
+
+
+StringExpr::StringExpr(const char *strData, unsigned byteLength, bool Wide) {
+  // OPTIMIZE: could allocate this appended to the StringExpr.
+  char *AStrData = new char[byteLength];
+  memcpy(AStrData, strData, byteLength);
+  StrData = AStrData;
+  ByteLength = byteLength;
+  isWide = Wide;
+}
+
+StringExpr::~StringExpr() {
+  delete[] StrData;
+}
+
+void StringExpr::dump_impl() const {
+  if (isWide) std::cerr << 'L';
+  std::cerr << '"' << StrData << '"';
+}
+
+
+
 void ParenExpr::dump_impl() const {
   std::cerr << "'('";
   Val->dump();
diff --git a/clang/AST/Sema.cpp b/clang/AST/Sema.cpp
index 06eed86d442..cae97fc2bd6 100644
--- a/clang/AST/Sema.cpp
+++ b/clang/AST/Sema.cpp
@@ -18,18 +18,23 @@
 #include "clang/Parse/Scope.h"
 #include "clang/Lex/IdentifierTable.h"
 #include "clang/Lex/LexerToken.h"
-#include "llvm/Support/Visibility.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/Compiler.h"
 using namespace llvm;
 using namespace clang;
 
 /// ASTBuilder
 namespace {
 class VISIBILITY_HIDDEN ASTBuilder : public Action {
+  Preprocessor &PP;
+  
   /// FullLocInfo - If this is true, the ASTBuilder constructs AST Nodes that
   /// capture maximal location information for each source-language construct.
   bool FullLocInfo;
 public:
-  ASTBuilder(bool fullLocInfo) : FullLocInfo(fullLocInfo) {}
+  ASTBuilder(Preprocessor &pp, bool fullLocInfo)
+    : PP(pp), FullLocInfo(fullLocInfo) {}
+  
   //===--------------------------------------------------------------------===//
   // Symbol table tracking callbacks.
   //
@@ -47,6 +52,9 @@ public:
   virtual ExprResult ParseFloatingConstant(const LexerToken &Tok);
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val);
+  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+                                     bool isWide,
+                                     const LexerToken *Toks, unsigned NumToks);
   
   // Binary/Unary Operators.  'Tok' is the token for the operator.
   virtual ExprResult ParseUnaryOp(const LexerToken &Tok, ExprTy *Input);
@@ -166,6 +174,26 @@ Action::ExprResult ASTBuilder::ParseParenExpr(SourceLocation L,
   return new ParenExpr(L, R, (Expr*)Val);
 }
 
+/// ParseStringExpr - This accepts a string after semantic analysis. This string
+/// may be the result of string concatenation ([C99 5.1.1.2, translation phase
+/// #6]), so it may come from multiple tokens.
+/// 
+Action::ExprResult ASTBuilder::
+ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide,
+                const LexerToken *Toks, unsigned NumToks) {
+  assert(NumToks && "Must have at least one string!");
+  
+  if (!FullLocInfo)
+    return new StringExpr(StrData, StrLen, isWide);
+  else {
+    SmallVector<SourceLocation, 4> Locs;
+    for (unsigned i = 0; i != NumToks; ++i)
+      Locs.push_back(Toks[i].getLocation());
+    return new StringExprLOC(StrData, StrLen, isWide, &Locs[0], Locs.size());
+  }
+}
+
+
 // Unary Operators.  'Tok' is the token for the operator.
 Action::ExprResult ASTBuilder::ParseUnaryOp(const LexerToken &Tok,
                                             ExprTy *Input) {
@@ -326,8 +354,8 @@ Action::ExprResult ASTBuilder::ParseConditionalOp(SourceLocation QuestionLoc,
 
 /// Interface to the Builder.cpp file.
 ///
-Action *CreateASTBuilderActions(bool FullLocInfo) {
-  return new ASTBuilder(FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo) {
+  return new ASTBuilder(PP, FullLocInfo);
 }
 
 
diff --git a/clang/Lex/Lexer.cpp b/clang/Lex/Lexer.cpp
index c686d918262..4bceedd4124 100644
--- a/clang/Lex/Lexer.cpp
+++ b/clang/Lex/Lexer.cpp
@@ -444,7 +444,7 @@ void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) {
 
 /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
 /// either " or L".
-void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
+void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr, bool Wide){
   const char *NulCharacter = 0; // Does this string contain the \0 character?
   
   char C = getAndAdvanceChar(CurPtr, Result);
@@ -468,7 +468,7 @@ void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
   // If a nul character existed in the string, warn about it.
   if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
 
-  Result.SetKind(tok::string_literal);
+  Result.SetKind(Wide ? tok::wide_string_literal : tok::string_literal);
 
   // Update the location of the token as well as the BufferPtr instance var.
   FormTokenWithChars(Result, CurPtr);
@@ -1104,7 +1104,8 @@ LexNextToken:
 
     // Wide string literal.
     if (Char == '"')
-      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+                              true);
 
     // Wide character constant.
     if (Char == '\'')
@@ -1143,7 +1144,7 @@ LexNextToken:
   case '"':
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
-    return LexStringLiteral(Result, CurPtr);
+    return LexStringLiteral(Result, CurPtr, false);
 
   // C99 6.4.6: Punctuators.
   case '?':
diff --git a/clang/Lex/MacroExpander.cpp b/clang/Lex/MacroExpander.cpp
index 423eb9ba360..1a64e36a10d 100644
--- a/clang/Lex/MacroExpander.cpp
+++ b/clang/Lex/MacroExpander.cpp
@@ -161,8 +161,9 @@ static LexerToken StringifyArgument(const LexerToken *ArgToks,
     
     // If this is a string or character constant, escape the token as specified
     // by 6.10.3.2p2.
-    if (Tok.getKind() == tok::string_literal ||  // "foo" and L"foo".
-        Tok.getKind() == tok::char_constant) {   // 'x' and L'x'.
+    if (Tok.getKind() == tok::string_literal ||      // "foo"
+        Tok.getKind() == tok::wide_string_literal || // L"foo"
+        Tok.getKind() == tok::char_constant) {       // 'x' and L'x'.
       Result += Lexer::Stringify(PP.getSpelling(Tok));
     } else {
       // Otherwise, just append the token.
diff --git a/clang/Lex/Pragma.cpp b/clang/Lex/Pragma.cpp
index 64b04ccfc60..2fbf9cc6c09 100644
--- a/clang/Lex/Pragma.cpp
+++ b/clang/Lex/Pragma.cpp
@@ -96,7 +96,8 @@ void Preprocessor::Handle_Pragma(LexerToken &Tok) {
 
   // Read the '"..."'.
   Lex(Tok);
-  if (Tok.getKind() != tok::string_literal)
+  if (Tok.getKind() != tok::string_literal &&
+      Tok.getKind() != tok::wide_string_literal)
     return Diag(PragmaLoc, diag::err__Pragma_malformed);
   
   // Remember the string.
diff --git a/clang/Lex/Preprocessor.cpp b/clang/Lex/Preprocessor.cpp
index 702af741369..d8d3c42536b 100644
--- a/clang/Lex/Preprocessor.cpp
+++ b/clang/Lex/Preprocessor.cpp
@@ -444,7 +444,6 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
 /// tokens from it instead of the current buffer.
 void Preprocessor::EnterMacro(LexerToken &Tok, MacroArgs *Args) {
   IdentifierInfo *Identifier = Tok.getIdentifierInfo();
-  MacroInfo &MI = *Identifier->getMacroInfo();
   IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
                                                CurMacroExpander));
   CurLexer     = 0;
@@ -1492,7 +1491,8 @@ void Preprocessor::HandleIdentSCCSDirective(LexerToken &Tok) {
   Lex(StrTok);
   
   // If the token kind isn't a string, it's a malformed directive.
-  if (StrTok.getKind() != tok::string_literal)
+  if (StrTok.getKind() != tok::string_literal &&
+      StrTok.getKind() != tok::wide_string_literal)
     return Diag(StrTok, diag::err_pp_malformed_ident);
   
   // Verify that there is nothing after the string, other than EOM.
diff --git a/clang/Parse/ParseExpr.cpp b/clang/Parse/ParseExpr.cpp
index 54284d0d790..3b641e7c6d7 100644
--- a/clang/Parse/ParseExpr.cpp
+++ b/clang/Parse/ParseExpr.cpp
@@ -22,6 +22,8 @@
 #include "clang/Parse/Parser.h"
 #include "clang/Basic/Diagnostic.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/Alloca.h"
 using namespace llvm;
 using namespace clang;
 
@@ -490,6 +492,7 @@ Parser::ExprResult Parser::ParseCastExpression(bool isUnaryExpression) {
     // These can be followed by postfix-expr pieces.
     return ParsePostfixExpressionSuffix(Res);
   case tok::string_literal:    // primary-expression: string-literal
+  case tok::wide_string_literal:
     Res = ParseStringLiteralExpression();
     if (Res.isInvalid) return Res;
     // This can be followed by postfix-expr pieces (e.g. "foo"[1]).
@@ -809,24 +812,6 @@ Parser::ExprResult Parser::ParseBuiltinPrimaryExpression() {
   return ParsePostfixExpressionSuffix(Res);
 }
 
-/// ParseStringLiteralExpression - This handles the various token types that
-/// form string literals, and also handles string concatenation [C99 5.1.1.2,
-/// translation phase #6].
-///
-///       primary-expression: [C99 6.5.1]
-///         string-literal
-Parser::ExprResult Parser::ParseStringLiteralExpression() {
-  assert(isTokenStringLiteral() && "Not a string literal!");
-  ConsumeStringToken();
-  
-  // String concat.  Note that keywords like __func__ and __FUNCTION__ aren't
-  // considered to be strings.
-  while (isTokenStringLiteral())
-    ConsumeStringToken();
-  // TODO: Build AST for string literals.
-  return ExprResult(false);
-}
-
 
 /// ParseParenExpression - This parses the unit that starts with a '(' token,
 /// based on what is allowed by ExprType.  The actual thing parsed is returned
@@ -906,3 +891,223 @@ Parser::ExprResult Parser::ParseParenExpression(ParenParseOption &ExprType,
   
   return Result;
 }
+
+/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+/// not valid.
+static int HexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10;
+  if (C >= 'A' && C <= 'F') return C-'A'+10;
+  return -1;
+}
+
+/// ParseStringLiteralExpression - This handles the various token types that
+/// form string literals, and also handles string concatenation [C99 5.1.1.2,
+/// translation phase #6].
+///
+///       primary-expression: [C99 6.5.1]
+///         string-literal
+Parser::ExprResult Parser::ParseStringLiteralExpression() {
+  assert(isTokenStringLiteral() && "Not a string literal!");
+  
+  // String concat.  Note that keywords like __func__ and __FUNCTION__ are not
+  // considered to be strings for concatenation purposes.
+  SmallVector<LexerToken, 4> StringToks;
+  
+  // While we're looking at all of the string portions, remember the max
+  // individual token length, computing a bound on the concatenated string
+  // length, and see whether any piece is a wide-string.  If any of the string
+  // portions is a wide-string literal, the result is also a wide-string literal
+  // [C99 6.4.5p4].
+  unsigned SizeBound = 0, MaxTokenLength = 0;
+  bool AnyWide = false;
+  do {
+    // The string could be shorter than this if it needs cleaning, but this is a
+    // reasonable bound, which is all we need.
+    SizeBound += Tok.getLength()-2;  // -2 for "".
+    
+    // Find maximum string piece length.
+    if (Tok.getLength() > MaxTokenLength) 
+      MaxTokenLength = Tok.getLength();
+    
+    // Remember if we see any wide strings.
+    AnyWide |= Tok.getKind() == tok::wide_string_literal;
+    
+    // Remember the string token.
+    StringToks.push_back(Tok);
+    ConsumeStringToken();
+  } while (isTokenStringLiteral());
+  
+  // Include space for the null terminator.
+  ++SizeBound;
+  
+  // TODO: K&R warning: "traditional C rejects string constant concatenation"
+  
+  // FIXME: Size of wchar_t should not be hardcoded!
+  unsigned wchar_tByteWidth = 4;
+  
+  // The output buffer size needs to be large enough to hold wide characters.
+  // This is a worst-case assumption which basically corresponds to L"" "long".
+  if (AnyWide)
+    SizeBound *= wchar_tByteWidth;
+  
+  // Create a temporary buffer to hold the result string data.  If it is "big",
+  // use malloc, otherwise use alloca.
+  char *ResultBuf;
+  if (SizeBound > 512)
+    ResultBuf = (char*)malloc(SizeBound);
+  else
+    ResultBuf = (char*)alloca(SizeBound);
+  
+  // Likewise, but for each string piece.
+  char *TokenBuf;
+  if (MaxTokenLength > 512)
+    TokenBuf = (char*)malloc(MaxTokenLength);
+  else
+    TokenBuf = (char*)alloca(MaxTokenLength);
+  
+  // Loop over all the strings, getting their spelling, and expanding them to
+  // wide strings as appropriate.
+  char *ResultPtr = ResultBuf;   // Next byte to fill in.
+  
+  for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
+    const char *ThisTokBuf = TokenBuf;
+    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
+    // that ThisTokBuf points to a buffer that is big enough for the whole token
+    // and 'spelled' tokens can only shrink.
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+    
+    // TODO: Input character set mapping support.
+    
+    // Skip L marker for wide strings.
+    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
+    
+    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+    ++ThisTokBuf;
+    
+    while (ThisTokBuf != ThisTokEnd) {
+      // Is this a span of non-escape characters?
+      if (ThisTokBuf[0] != '\\') {
+        const char *InStart = ThisTokBuf;
+        do {
+          ++ThisTokBuf;
+        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+        
+        // Copy the character span over.
+        unsigned Len = ThisTokBuf-InStart;
+        if (!AnyWide) {
+          memcpy(ResultPtr, InStart, Len);
+          ResultPtr += Len;
+        } else {
+          // Note: our internal rep of wide char tokens is always little-endian.
+          for (; Len; --Len, ++InStart) {
+            *ResultPtr++ = InStart[0];
+            // Add zeros at the end.
+            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+              *ResultPtr++ = 0;
+          }
+        }
+        continue;
+      }
+      
+      // Otherwise, this is an escape character.  Skip the '\' char.
+      ++ThisTokBuf;
+      
+      // We know that this character can't be off the end of the buffer, because
+      // that would have been \", which would not have been the end of string.
+      unsigned ResultChar = *ThisTokBuf++;
+      switch (ResultChar) {
+      // These map to themselves.
+      case '\\': case '\'': case '"': case '?': break;
+        
+      // These have fixed mappings.
+      case 'a':
+        // TODO: K&R: the meaning of '\\a' is different in traditional C
+        ResultChar = 7;
+        break;
+      case 'b':
+        ResultChar = 8;
+        break;
+      case 'e':
+        PP.Diag(StringToks[i], diag::ext_nonstandard_escape, "e");
+        ResultChar = 27;
+        break;
+      case 'f':
+        ResultChar = 12;
+        break;
+      case 'n':
+        ResultChar = 10;
+        break;
+      case 'r':
+        ResultChar = 13;
+        break;
+      case 't':
+        ResultChar = 9;
+        break;
+      case 'v':
+        ResultChar = 11;
+        break;
+        
+      //case 'u': case 'U':  // FIXME: UCNs.
+      case 'x': // Hex escape.
+        if (ThisTokBuf == ThisTokEnd ||
+            (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
+          PP.Diag(StringToks[i], diag::err_hex_escape_no_digits);
+          ResultChar = 0;
+          break;
+        }
+        ++ThisTokBuf; // Consumed one hex digit.
+        
+        assert(0 && "hex escape: unimp!");
+        break;
+      case '0': case '1': case '2': case '3':
+      case '4': case '5': case '6': case '7':
+        // Octal escapes.
+        assert(0 && "octal escape: unimp!");
+        break;
+        
+      // Otherwise, these are not valid escapes.
+      case '(': case '{': case '[': case '%':
+        // GCC accepts these as extensions.  We warn about them as such though.
+        if (!PP.getLangOptions().NoExtensions) {
+          PP.Diag(StringToks[i], diag::ext_nonstandard_escape,
+                  std::string()+(char)ResultChar);
+          break;
+        }
+        // FALL THROUGH.
+      default:
+        if (isgraph(ThisTokBuf[0])) {
+          PP.Diag(StringToks[i], diag::ext_unknown_escape,
+                  std::string()+(char)ResultChar);
+        } else {
+          PP.Diag(StringToks[i], diag::ext_unknown_escape,
+                  "x"+utohexstr(ResultChar));
+        }
+      }
+
+      // Note: our internal rep of wide char tokens is always little-endian.
+      for (unsigned i = 0, e = wchar_tByteWidth; i != e; ++i)
+        *ResultPtr++ = ResultChar >> i*8;
+    }
+  }
+  
+  // Add zero terminator.
+  *ResultPtr = 0;
+  if (AnyWide) {
+    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+      *ResultPtr++ = 0;
+  }
+  
+  // Hand this off to the Actions.
+  ExprResult Res = Actions.ParseStringExpr(ResultBuf, ResultPtr-ResultBuf,
+                                           AnyWide,
+                                           &StringToks[0], StringToks.size());
+  
+  // If either buffer was heap allocated, release it now.
+  if (MaxTokenLength > 512) free(TokenBuf);
+  if (SizeBound > 512) free(ResultBuf);
+  
+  return Res;
+}
+
diff --git a/clang/Parse/ParseStmt.cpp b/clang/Parse/ParseStmt.cpp
index b676d46a437..063e86b9ad8 100644
--- a/clang/Parse/ParseStmt.cpp
+++ b/clang/Parse/ParseStmt.cpp
@@ -597,7 +597,7 @@ void Parser::ParseAsmStatement() {
   if (Tok.getKind() == tok::colon) {
     ConsumeToken();
     
-    if (Tok.getKind() == tok::string_literal) {
+    if (isTokenStringLiteral()) {
       // Parse the asm-string list for clobbers.
       while (1) {
         ParseAsmStringLiteral();
@@ -629,7 +629,7 @@ void Parser::ParseAsmOperandsOpt() {
   ConsumeToken();
   
   // 'asm-operands' isn't present?
-  if (Tok.getKind() != tok::string_literal && Tok.getKind() != tok::l_square)
+  if (!isTokenStringLiteral() && Tok.getKind() != tok::l_square)
     return;
   
   while (1) {
diff --git a/clang/Parse/Parser.cpp b/clang/Parse/Parser.cpp
index c74c634632c..1a210ce172b 100644
--- a/clang/Parse/Parser.cpp
+++ b/clang/Parse/Parser.cpp
@@ -152,6 +152,7 @@ bool Parser::SkipUntil(tok::TokenKind T, bool StopAtSemi, bool DontConsume) {
       break;
       
     case tok::string_literal:
+    case tok::wide_string_literal:
       ConsumeStringToken();
       break;
     case tok::semi:
@@ -405,7 +406,7 @@ void Parser::ParseFunctionDefinition(Declarator &D) {
 ///         string-literal
 ///
 void Parser::ParseAsmStringLiteral() {
-  if (Tok.getKind() != tok::string_literal) {
+  if (!isTokenStringLiteral()) {
     Diag(Tok, diag::err_expected_string_literal);
     return;
   }
diff --git a/clang/Sema/ASTStreamer.cpp b/clang/Sema/ASTStreamer.cpp
index bd192040d7b..7db53f3ae6f 100644
--- a/clang/Sema/ASTStreamer.cpp
+++ b/clang/Sema/ASTStreamer.cpp
@@ -19,7 +19,7 @@ using namespace clang;
 
 /// Interface to the Builder.cpp file.
 ///
-Action *CreateASTBuilderActions(bool FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo);
 
 
 namespace {
@@ -27,7 +27,7 @@ namespace {
     Parser P;
   public:
     ASTStreamer(Preprocessor &PP, unsigned MainFileID, bool FullLocInfo)
-      : P(PP, *CreateASTBuilderActions(FullLocInfo)) {
+      : P(PP, *CreateASTBuilderActions(PP, FullLocInfo)) {
       PP.EnterSourceFile(MainFileID, 0, true);
       
       // Initialize the parser.
diff --git a/clang/Sema/Sema.cpp b/clang/Sema/Sema.cpp
index 06eed86d442..cae97fc2bd6 100644
--- a/clang/Sema/Sema.cpp
+++ b/clang/Sema/Sema.cpp
@@ -18,18 +18,23 @@
 #include "clang/Parse/Scope.h"
 #include "clang/Lex/IdentifierTable.h"
 #include "clang/Lex/LexerToken.h"
-#include "llvm/Support/Visibility.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/Compiler.h"
 using namespace llvm;
 using namespace clang;
 
 /// ASTBuilder
 namespace {
 class VISIBILITY_HIDDEN ASTBuilder : public Action {
+  Preprocessor &PP;
+  
   /// FullLocInfo - If this is true, the ASTBuilder constructs AST Nodes that
   /// capture maximal location information for each source-language construct.
   bool FullLocInfo;
 public:
-  ASTBuilder(bool fullLocInfo) : FullLocInfo(fullLocInfo) {}
+  ASTBuilder(Preprocessor &pp, bool fullLocInfo)
+    : PP(pp), FullLocInfo(fullLocInfo) {}
+  
   //===--------------------------------------------------------------------===//
   // Symbol table tracking callbacks.
   //
@@ -47,6 +52,9 @@ public:
   virtual ExprResult ParseFloatingConstant(const LexerToken &Tok);
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val);
+  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+                                     bool isWide,
+                                     const LexerToken *Toks, unsigned NumToks);
   
   // Binary/Unary Operators.  'Tok' is the token for the operator.
   virtual ExprResult ParseUnaryOp(const LexerToken &Tok, ExprTy *Input);
@@ -166,6 +174,26 @@ Action::ExprResult ASTBuilder::ParseParenExpr(SourceLocation L,
   return new ParenExpr(L, R, (Expr*)Val);
 }
 
+/// ParseStringExpr - This accepts a string after semantic analysis. This string
+/// may be the result of string concatenation ([C99 5.1.1.2, translation phase
+/// #6]), so it may come from multiple tokens.
+/// 
+Action::ExprResult ASTBuilder::
+ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide,
+                const LexerToken *Toks, unsigned NumToks) {
+  assert(NumToks && "Must have at least one string!");
+  
+  if (!FullLocInfo)
+    return new StringExpr(StrData, StrLen, isWide);
+  else {
+    SmallVector<SourceLocation, 4> Locs;
+    for (unsigned i = 0; i != NumToks; ++i)
+      Locs.push_back(Toks[i].getLocation());
+    return new StringExprLOC(StrData, StrLen, isWide, &Locs[0], Locs.size());
+  }
+}
+
+
 // Unary Operators.  'Tok' is the token for the operator.
 Action::ExprResult ASTBuilder::ParseUnaryOp(const LexerToken &Tok,
                                             ExprTy *Input) {
@@ -326,8 +354,8 @@ Action::ExprResult ASTBuilder::ParseConditionalOp(SourceLocation QuestionLoc,
 
 /// Interface to the Builder.cpp file.
 ///
-Action *CreateASTBuilderActions(bool FullLocInfo) {
-  return new ASTBuilder(FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo) {
+  return new ASTBuilder(PP, FullLocInfo);
 }
 
 
diff --git a/clang/clang.xcodeproj/project.pbxproj b/clang/clang.xcodeproj/project.pbxproj
index 728f768c664..260cb9e1495 100644
--- a/clang/clang.xcodeproj/project.pbxproj
+++ b/clang/clang.xcodeproj/project.pbxproj
@@ -424,7 +424,7 @@
 		1DEB923208733DC60010E9CD /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				ARCHS = ppc;
+				ARCHS = i386;
 				COPY_PHASE_STRIP = NO;
 				GCC_CW_ASM_SYNTAX = NO;
 				GCC_DYNAMIC_NO_PIC = NO;
@@ -460,7 +460,7 @@
 		1DEB923308733DC60010E9CD /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				ARCHS = ppc;
+				ARCHS = i386;
 				GCC_CW_ASM_SYNTAX = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 472e889ca8e..a7cdd150e58 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -15,7 +15,7 @@
 #define LLVM_CLANG_AST_EXPR_H
 
 #include "clang/Basic/SourceLocation.h"
-#include <cassert>
+#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
 namespace clang {
@@ -64,6 +64,26 @@ public:
   virtual void dump_impl() const;
 };
 
+class StringExpr : public Expr {
+  const char *StrData;
+  unsigned ByteLength;
+  bool isWide;
+public:
+  StringExpr(const char *strData, unsigned byteLength, bool Wide);
+  virtual ~StringExpr();
+  virtual void dump_impl() const;
+};
+
+class StringExprLOC : public StringExpr {
+  // Locations for the string tokens before string concatenation.
+  SmallVector<SourceLocation, 4> Locs;
+public:
+  StringExprLOC(const char *StrData, unsigned ByteLength, bool isWide,
+                SourceLocation *L, unsigned NumLocs)
+    : StringExpr(StrData, ByteLength, isWide), Locs(L, L+NumLocs) {
+  }
+};
+
 /// ParenExpr - This represents a parethesized expression, e.g. "(1)".  This
 /// AST node is only formed if full location information is requested.
 class ParenExpr : public Expr {
diff --git a/clang/include/clang/Basic/DiagnosticKinds.def b/clang/include/clang/Basic/DiagnosticKinds.def
index 25d842106de..2dcf0467d83 100644
--- a/clang/include/clang/Basic/DiagnosticKinds.def
+++ b/clang/include/clang/Basic/DiagnosticKinds.def
@@ -342,6 +342,18 @@ DIAG(err_expected_asm_operand, ERROR,
 DIAG(err_matching, ERROR,
      "to match this '%s'")
 
+//===----------------------------------------------------------------------===//
+// Semantic Analysis
+//===----------------------------------------------------------------------===//
+
+DIAG(ext_nonstandard_escape, EXTENSION,
+     "use of non-standard escape character '\\%s'")
+DIAG(ext_unknown_escape, EXTENSION,
+     "unknown escape sequence '\\%s'")
+
+DIAG(err_hex_escape_no_digits, ERROR,
+     "\\x used with no following hex digits")
+
 DIAG(err_typename_requires_specqual, ERROR,
      "type name requires a specifier or qualifier")
 DIAG(err_typename_invalid_storageclass, ERROR,
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index f7f20609ff8..886ad8faae5 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -42,7 +42,8 @@ TOK(numeric_constant)    // 0x123
 TOK(char_constant)       // 'a'   L'b'
 
 // C99 6.4.5: String Literals.
-TOK(string_literal)      // "foo"  L"foo"
+TOK(string_literal)      // "foo"
+TOK(wide_string_literal) // L"foo"
 TOK(angle_string_literal)// <foo>
 
 // C99 6.4.6: Punctuators.
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index c507222fa85..7ef310e957a 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -329,7 +329,7 @@ private:
   // Helper functions to lex the remainder of a token of the specific type.
   void LexIdentifier         (LexerToken &Result, const char *CurPtr);
   void LexNumericConstant    (LexerToken &Result, const char *CurPtr);
-  void LexStringLiteral      (LexerToken &Result, const char *CurPtr);
+  void LexStringLiteral      (LexerToken &Result, const char *CurPtr,bool Wide);
   void LexAngledStringLiteral(LexerToken &Result, const char *CurPtr);
   void LexCharConstant       (LexerToken &Result, const char *CurPtr);
   bool LexEndOfFile          (LexerToken &Result, const char *CurPtr);
diff --git a/clang/include/clang/Parse/Action.h b/clang/include/clang/Parse/Action.h
index 6e40195605b..74f7d83367d 100644
--- a/clang/include/clang/Parse/Action.h
+++ b/clang/include/clang/Parse/Action.h
@@ -94,11 +94,19 @@ public:
   virtual ExprResult ParseSimplePrimaryExpr(const LexerToken &Tok) { return 0; }
   virtual ExprResult ParseIntegerConstant(const LexerToken &Tok) { return 0; }
   virtual ExprResult ParseFloatingConstant(const LexerToken &Tok) { return 0; }
-
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val) {
     return Val;  // Default impl returns operand.
   }
+  
+  /// ParseStringExpr - The (null terminated) string data is specified with
+  /// StrData+StrLen.  isWide is true if this is a wide string. The Toks/NumToks
+  /// array exposes the input tokens to provide location information.
+  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+                                     bool isWide,
+                                     const LexerToken *Toks, unsigned NumToks) {
+    return 0;
+  }
 
   // Postfix Expressions.
   virtual ExprResult ParsePostfixUnaryOp(const LexerToken &Tok, ExprTy *Input) {
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 5667d996e0c..edb980b424d 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -89,7 +89,8 @@ private:
   /// isTokenStringLiteral - True if this token is a string-literal.
   ///
   bool isTokenStringLiteral() const {
-    return Tok.getKind() == tok::string_literal;
+    return Tok.getKind() == tok::string_literal ||
+           Tok.getKind() == tok::wide_string_literal;
   }
   
   /// ConsumeToken - Consume the current 'peek token' and lex the next one.