diff options
| -rw-r--r-- | clang/AST/ASTStreamer.cpp | 4 | ||||
| -rw-r--r-- | clang/AST/Expr.cpp | 22 | ||||
| -rw-r--r-- | clang/AST/Sema.cpp | 36 | ||||
| -rw-r--r-- | clang/Lex/Lexer.cpp | 9 | ||||
| -rw-r--r-- | clang/Lex/MacroExpander.cpp | 5 | ||||
| -rw-r--r-- | clang/Lex/Pragma.cpp | 3 | ||||
| -rw-r--r-- | clang/Lex/Preprocessor.cpp | 4 | ||||
| -rw-r--r-- | clang/Parse/ParseExpr.cpp | 241 | ||||
| -rw-r--r-- | clang/Parse/ParseStmt.cpp | 4 | ||||
| -rw-r--r-- | clang/Parse/Parser.cpp | 3 | ||||
| -rw-r--r-- | clang/Sema/ASTStreamer.cpp | 4 | ||||
| -rw-r--r-- | clang/Sema/Sema.cpp | 36 | ||||
| -rw-r--r-- | clang/clang.xcodeproj/project.pbxproj | 4 | ||||
| -rw-r--r-- | clang/include/clang/AST/Expr.h | 22 | ||||
| -rw-r--r-- | clang/include/clang/Basic/DiagnosticKinds.def | 12 | ||||
| -rw-r--r-- | clang/include/clang/Basic/TokenKinds.def | 3 | ||||
| -rw-r--r-- | clang/include/clang/Lex/Lexer.h | 2 | ||||
| -rw-r--r-- | clang/include/clang/Parse/Action.h | 10 | ||||
| -rw-r--r-- | clang/include/clang/Parse/Parser.h | 3 |
19 files changed, 378 insertions, 49 deletions
diff --git a/clang/AST/ASTStreamer.cpp b/clang/AST/ASTStreamer.cpp index bd192040d7b..7db53f3ae6f 100644 --- a/clang/AST/ASTStreamer.cpp +++ b/clang/AST/ASTStreamer.cpp @@ -19,7 +19,7 @@ using namespace clang; /// Interface to the Builder.cpp file. /// -Action *CreateASTBuilderActions(bool FullLocInfo); +Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo); namespace { @@ -27,7 +27,7 @@ namespace { Parser P; public: ASTStreamer(Preprocessor &PP, unsigned MainFileID, bool FullLocInfo) - : P(PP, *CreateASTBuilderActions(FullLocInfo)) { + : P(PP, *CreateASTBuilderActions(PP, FullLocInfo)) { PP.EnterSourceFile(MainFileID, 0, true); // Initialize the parser. diff --git a/clang/AST/Expr.cpp b/clang/AST/Expr.cpp index 46051574491..f00301c759f 100644 --- a/clang/AST/Expr.cpp +++ b/clang/AST/Expr.cpp @@ -43,6 +43,28 @@ void FloatingConstant::dump_impl() const { std::cerr << "1.0"; } + + +StringExpr::StringExpr(const char *strData, unsigned byteLength, bool Wide) { + // OPTIMIZE: could allocate this appended to the StringExpr. + char *AStrData = new char[byteLength]; + memcpy(AStrData, strData, byteLength); + StrData = AStrData; + ByteLength = byteLength; + isWide = Wide; +} + +StringExpr::~StringExpr() { + delete[] StrData; +} + +void StringExpr::dump_impl() const { + if (isWide) std::cerr << 'L'; + std::cerr << '"' << StrData << '"'; +} + + + void ParenExpr::dump_impl() const { std::cerr << "'('"; Val->dump(); diff --git a/clang/AST/Sema.cpp b/clang/AST/Sema.cpp index 06eed86d442..cae97fc2bd6 100644 --- a/clang/AST/Sema.cpp +++ b/clang/AST/Sema.cpp @@ -18,18 +18,23 @@ #include "clang/Parse/Scope.h" #include "clang/Lex/IdentifierTable.h" #include "clang/Lex/LexerToken.h" -#include "llvm/Support/Visibility.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/Support/Compiler.h" using namespace llvm; using namespace clang; /// ASTBuilder namespace { class VISIBILITY_HIDDEN ASTBuilder : public Action { + Preprocessor &PP; + /// FullLocInfo - If this is true, the ASTBuilder constructs AST Nodes that /// capture maximal location information for each source-language construct. bool FullLocInfo; public: - ASTBuilder(bool fullLocInfo) : FullLocInfo(fullLocInfo) {} + ASTBuilder(Preprocessor &pp, bool fullLocInfo) + : PP(pp), FullLocInfo(fullLocInfo) {} + //===--------------------------------------------------------------------===// // Symbol table tracking callbacks. // @@ -47,6 +52,9 @@ public: virtual ExprResult ParseFloatingConstant(const LexerToken &Tok); virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R, ExprTy *Val); + virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen, + bool isWide, + const LexerToken *Toks, unsigned NumToks); // Binary/Unary Operators. 'Tok' is the token for the operator. virtual ExprResult ParseUnaryOp(const LexerToken &Tok, ExprTy *Input); @@ -166,6 +174,26 @@ Action::ExprResult ASTBuilder::ParseParenExpr(SourceLocation L, return new ParenExpr(L, R, (Expr*)Val); } +/// ParseStringExpr - This accepts a string after semantic analysis. This string +/// may be the result of string concatenation ([C99 5.1.1.2, translation phase +/// #6]), so it may come from multiple tokens. +/// +Action::ExprResult ASTBuilder:: +ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide, + const LexerToken *Toks, unsigned NumToks) { + assert(NumToks && "Must have at least one string!"); + + if (!FullLocInfo) + return new StringExpr(StrData, StrLen, isWide); + else { + SmallVector<SourceLocation, 4> Locs; + for (unsigned i = 0; i != NumToks; ++i) + Locs.push_back(Toks[i].getLocation()); + return new StringExprLOC(StrData, StrLen, isWide, &Locs[0], Locs.size()); + } +} + + // Unary Operators. 'Tok' is the token for the operator. Action::ExprResult ASTBuilder::ParseUnaryOp(const LexerToken &Tok, ExprTy *Input) { @@ -326,8 +354,8 @@ Action::ExprResult ASTBuilder::ParseConditionalOp(SourceLocation QuestionLoc, /// Interface to the Builder.cpp file. /// -Action *CreateASTBuilderActions(bool FullLocInfo) { - return new ASTBuilder(FullLocInfo); +Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo) { + return new ASTBuilder(PP, FullLocInfo); } diff --git a/clang/Lex/Lexer.cpp b/clang/Lex/Lexer.cpp index c686d918262..4bceedd4124 100644 --- a/clang/Lex/Lexer.cpp +++ b/clang/Lex/Lexer.cpp @@ -444,7 +444,7 @@ void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) { /// LexStringLiteral - Lex the remainder of a string literal, after having lexed /// either " or L". -void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) { +void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr, bool Wide){ const char *NulCharacter = 0; // Does this string contain the \0 character? char C = getAndAdvanceChar(CurPtr, Result); @@ -468,7 +468,7 @@ void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) { // If a nul character existed in the string, warn about it. if (NulCharacter) Diag(NulCharacter, diag::null_in_string); - Result.SetKind(tok::string_literal); + Result.SetKind(Wide ? tok::wide_string_literal : tok::string_literal); // Update the location of the token as well as the BufferPtr instance var. FormTokenWithChars(Result, CurPtr); @@ -1104,7 +1104,8 @@ LexNextToken: // Wide string literal. if (Char == '"') - return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result)); + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), + true); // Wide character constant. if (Char == '\'') @@ -1143,7 +1144,7 @@ LexNextToken: case '"': // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexStringLiteral(Result, CurPtr); + return LexStringLiteral(Result, CurPtr, false); // C99 6.4.6: Punctuators. case '?': diff --git a/clang/Lex/MacroExpander.cpp b/clang/Lex/MacroExpander.cpp index 423eb9ba360..1a64e36a10d 100644 --- a/clang/Lex/MacroExpander.cpp +++ b/clang/Lex/MacroExpander.cpp @@ -161,8 +161,9 @@ static LexerToken StringifyArgument(const LexerToken *ArgToks, // If this is a string or character constant, escape the token as specified // by 6.10.3.2p2. - if (Tok.getKind() == tok::string_literal || // "foo" and L"foo". - Tok.getKind() == tok::char_constant) { // 'x' and L'x'. + if (Tok.getKind() == tok::string_literal || // "foo" + Tok.getKind() == tok::wide_string_literal || // L"foo" + Tok.getKind() == tok::char_constant) { // 'x' and L'x'. Result += Lexer::Stringify(PP.getSpelling(Tok)); } else { // Otherwise, just append the token. diff --git a/clang/Lex/Pragma.cpp b/clang/Lex/Pragma.cpp index 64b04ccfc60..2fbf9cc6c09 100644 --- a/clang/Lex/Pragma.cpp +++ b/clang/Lex/Pragma.cpp @@ -96,7 +96,8 @@ void Preprocessor::Handle_Pragma(LexerToken &Tok) { // Read the '"..."'. Lex(Tok); - if (Tok.getKind() != tok::string_literal) + if (Tok.getKind() != tok::string_literal && + Tok.getKind() != tok::wide_string_literal) return Diag(PragmaLoc, diag::err__Pragma_malformed); // Remember the string. diff --git a/clang/Lex/Preprocessor.cpp b/clang/Lex/Preprocessor.cpp index 702af741369..d8d3c42536b 100644 --- a/clang/Lex/Preprocessor.cpp +++ b/clang/Lex/Preprocessor.cpp @@ -444,7 +444,6 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, /// tokens from it instead of the current buffer. void Preprocessor::EnterMacro(LexerToken &Tok, MacroArgs *Args) { IdentifierInfo *Identifier = Tok.getIdentifierInfo(); - MacroInfo &MI = *Identifier->getMacroInfo(); IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, CurMacroExpander)); CurLexer = 0; @@ -1492,7 +1491,8 @@ void Preprocessor::HandleIdentSCCSDirective(LexerToken &Tok) { Lex(StrTok); // If the token kind isn't a string, it's a malformed directive. - if (StrTok.getKind() != tok::string_literal) + if (StrTok.getKind() != tok::string_literal && + StrTok.getKind() != tok::wide_string_literal) return Diag(StrTok, diag::err_pp_malformed_ident); // Verify that there is nothing after the string, other than EOM. diff --git a/clang/Parse/ParseExpr.cpp b/clang/Parse/ParseExpr.cpp index 54284d0d790..3b641e7c6d7 100644 --- a/clang/Parse/ParseExpr.cpp +++ b/clang/Parse/ParseExpr.cpp @@ -22,6 +22,8 @@ #include "clang/Parse/Parser.h" #include "clang/Basic/Diagnostic.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/Alloca.h" using namespace llvm; using namespace clang; @@ -490,6 +492,7 @@ Parser::ExprResult Parser::ParseCastExpression(bool isUnaryExpression) { // These can be followed by postfix-expr pieces. return ParsePostfixExpressionSuffix(Res); case tok::string_literal: // primary-expression: string-literal + case tok::wide_string_literal: Res = ParseStringLiteralExpression(); if (Res.isInvalid) return Res; // This can be followed by postfix-expr pieces (e.g. "foo"[1]). @@ -809,24 +812,6 @@ Parser::ExprResult Parser::ParseBuiltinPrimaryExpression() { return ParsePostfixExpressionSuffix(Res); } -/// ParseStringLiteralExpression - This handles the various token types that -/// form string literals, and also handles string concatenation [C99 5.1.1.2, -/// translation phase #6]. -/// -/// primary-expression: [C99 6.5.1] -/// string-literal -Parser::ExprResult Parser::ParseStringLiteralExpression() { - assert(isTokenStringLiteral() && "Not a string literal!"); - ConsumeStringToken(); - - // String concat. Note that keywords like __func__ and __FUNCTION__ aren't - // considered to be strings. - while (isTokenStringLiteral()) - ConsumeStringToken(); - // TODO: Build AST for string literals. - return ExprResult(false); -} - /// ParseParenExpression - This parses the unit that starts with a '(' token, /// based on what is allowed by ExprType. The actual thing parsed is returned @@ -906,3 +891,223 @@ Parser::ExprResult Parser::ParseParenExpression(ParenParseOption &ExprType, return Result; } + +/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's +/// not valid. +static int HexDigitValue(char C) { + if (C >= '0' && C <= '9') return C-'0'; + if (C >= 'a' && C <= 'f') return C-'a'+10; + if (C >= 'A' && C <= 'F') return C-'A'+10; + return -1; +} + +/// ParseStringLiteralExpression - This handles the various token types that +/// form string literals, and also handles string concatenation [C99 5.1.1.2, +/// translation phase #6]. +/// +/// primary-expression: [C99 6.5.1] +/// string-literal +Parser::ExprResult Parser::ParseStringLiteralExpression() { + assert(isTokenStringLiteral() && "Not a string literal!"); + + // String concat. Note that keywords like __func__ and __FUNCTION__ are not + // considered to be strings for concatenation purposes. + SmallVector<LexerToken, 4> StringToks; + + // While we're looking at all of the string portions, remember the max + // individual token length, computing a bound on the concatenated string + // length, and see whether any piece is a wide-string. If any of the string + // portions is a wide-string literal, the result is also a wide-string literal + // [C99 6.4.5p4]. + unsigned SizeBound = 0, MaxTokenLength = 0; + bool AnyWide = false; + do { + // The string could be shorter than this if it needs cleaning, but this is a + // reasonable bound, which is all we need. + SizeBound += Tok.getLength()-2; // -2 for "". + + // Find maximum string piece length. + if (Tok.getLength() > MaxTokenLength) + MaxTokenLength = Tok.getLength(); + + // Remember if we see any wide strings. + AnyWide |= Tok.getKind() == tok::wide_string_literal; + + // Remember the string token. + StringToks.push_back(Tok); + ConsumeStringToken(); + } while (isTokenStringLiteral()); + + // Include space for the null terminator. + ++SizeBound; + + // TODO: K&R warning: "traditional C rejects string constant concatenation" + + // FIXME: Size of wchar_t should not be hardcoded! + unsigned wchar_tByteWidth = 4; + + // The output buffer size needs to be large enough to hold wide characters. + // This is a worst-case assumption which basically corresponds to L"" "long". + if (AnyWide) + SizeBound *= wchar_tByteWidth; + + // Create a temporary buffer to hold the result string data. If it is "big", + // use malloc, otherwise use alloca. + char *ResultBuf; + if (SizeBound > 512) + ResultBuf = (char*)malloc(SizeBound); + else + ResultBuf = (char*)alloca(SizeBound); + + // Likewise, but for each string piece. + char *TokenBuf; + if (MaxTokenLength > 512) + TokenBuf = (char*)malloc(MaxTokenLength); + else + TokenBuf = (char*)alloca(MaxTokenLength); + + // Loop over all the strings, getting their spelling, and expanding them to + // wide strings as appropriate. + char *ResultPtr = ResultBuf; // Next byte to fill in. + + for (unsigned i = 0, e = StringToks.size(); i != e; ++i) { + const char *ThisTokBuf = TokenBuf; + // Get the spelling of the token, which eliminates trigraphs, etc. We know + // that ThisTokBuf points to a buffer that is big enough for the whole token + // and 'spelled' tokens can only shrink. + unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf); + const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. + + // TODO: Input character set mapping support. + + // Skip L marker for wide strings. + if (ThisTokBuf[0] == 'L') ++ThisTokBuf; + + assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); + ++ThisTokBuf; + + while (ThisTokBuf != ThisTokEnd) { + // Is this a span of non-escape characters? + if (ThisTokBuf[0] != '\\') { + const char *InStart = ThisTokBuf; + do { + ++ThisTokBuf; + } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); + + // Copy the character span over. + unsigned Len = ThisTokBuf-InStart; + if (!AnyWide) { + memcpy(ResultPtr, InStart, Len); + ResultPtr += Len; + } else { + // Note: our internal rep of wide char tokens is always little-endian. + for (; Len; --Len, ++InStart) { + *ResultPtr++ = InStart[0]; + // Add zeros at the end. + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } + } + continue; + } + + // Otherwise, this is an escape character. Skip the '\' char. + ++ThisTokBuf; + + // We know that this character can't be off the end of the buffer, because + // that would have been \", which would not have been the end of string. + unsigned ResultChar = *ThisTokBuf++; + switch (ResultChar) { + // These map to themselves. + case '\\': case '\'': case '"': case '?': break; + + // These have fixed mappings. + case 'a': + // TODO: K&R: the meaning of '\\a' is different in traditional C + ResultChar = 7; + break; + case 'b': + ResultChar = 8; + break; + case 'e': + PP.Diag(StringToks[i], diag::ext_nonstandard_escape, "e"); + ResultChar = 27; + break; + case 'f': + ResultChar = 12; + break; + case 'n': + ResultChar = 10; + break; + case 'r': + ResultChar = 13; + break; + case 't': + ResultChar = 9; + break; + case 'v': + ResultChar = 11; + break; + + //case 'u': case 'U': // FIXME: UCNs. + case 'x': // Hex escape. + if (ThisTokBuf == ThisTokEnd || + (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) { + PP.Diag(StringToks[i], diag::err_hex_escape_no_digits); + ResultChar = 0; + break; + } + ++ThisTokBuf; // Consumed one hex digit. + + assert(0 && "hex escape: unimp!"); + break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + // Octal escapes. + assert(0 && "octal escape: unimp!"); + break; + + // Otherwise, these are not valid escapes. + case '(': case '{': case '[': case '%': + // GCC accepts these as extensions. We warn about them as such though. + if (!PP.getLangOptions().NoExtensions) { + PP.Diag(StringToks[i], diag::ext_nonstandard_escape, + std::string()+(char)ResultChar); + break; + } + // FALL THROUGH. + default: + if (isgraph(ThisTokBuf[0])) { + PP.Diag(StringToks[i], diag::ext_unknown_escape, + std::string()+(char)ResultChar); + } else { + PP.Diag(StringToks[i], diag::ext_unknown_escape, + "x"+utohexstr(ResultChar)); + } + } + + // Note: our internal rep of wide char tokens is always little-endian. + for (unsigned i = 0, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = ResultChar >> i*8; + } + } + + // Add zero terminator. + *ResultPtr = 0; + if (AnyWide) { + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } + + // Hand this off to the Actions. + ExprResult Res = Actions.ParseStringExpr(ResultBuf, ResultPtr-ResultBuf, + AnyWide, + &StringToks[0], StringToks.size()); + + // If either buffer was heap allocated, release it now. + if (MaxTokenLength > 512) free(TokenBuf); + if (SizeBound > 512) free(ResultBuf); + + return Res; +} + diff --git a/clang/Parse/ParseStmt.cpp b/clang/Parse/ParseStmt.cpp index b676d46a437..063e86b9ad8 100644 --- a/clang/Parse/ParseStmt.cpp +++ b/clang/Parse/ParseStmt.cpp @@ -597,7 +597,7 @@ void Parser::ParseAsmStatement() { if (Tok.getKind() == tok::colon) { ConsumeToken(); - if (Tok.getKind() == tok::string_literal) { + if (isTokenStringLiteral()) { // Parse the asm-string list for clobbers. while (1) { ParseAsmStringLiteral(); @@ -629,7 +629,7 @@ void Parser::ParseAsmOperandsOpt() { ConsumeToken(); // 'asm-operands' isn't present? - if (Tok.getKind() != tok::string_literal && Tok.getKind() != tok::l_square) + if (!isTokenStringLiteral() && Tok.getKind() != tok::l_square) return; while (1) { diff --git a/clang/Parse/Parser.cpp b/clang/Parse/Parser.cpp index c74c634632c..1a210ce172b 100644 --- a/clang/Parse/Parser.cpp +++ b/clang/Parse/Parser.cpp @@ -152,6 +152,7 @@ bool Parser::SkipUntil(tok::TokenKind T, bool StopAtSemi, bool DontConsume) { break; case tok::string_literal: + case tok::wide_string_literal: ConsumeStringToken(); break; case tok::semi: @@ -405,7 +406,7 @@ void Parser::ParseFunctionDefinition(Declarator &D) { /// string-literal /// void Parser::ParseAsmStringLiteral() { - if (Tok.getKind() != tok::string_literal) { + if (!isTokenStringLiteral()) { Diag(Tok, diag::err_expected_string_literal); return; } diff --git a/clang/Sema/ASTStreamer.cpp b/clang/Sema/ASTStreamer.cpp index bd192040d7b..7db53f3ae6f 100644 --- a/clang/Sema/ASTStreamer.cpp +++ b/clang/Sema/ASTStreamer.cpp @@ -19,7 +19,7 @@ using namespace clang; /// Interface to the Builder.cpp file. /// -Action *CreateASTBuilderActions(bool FullLocInfo); +Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo); namespace { @@ -27,7 +27,7 @@ namespace { Parser P; public: ASTStreamer(Preprocessor &PP, unsigned MainFileID, bool FullLocInfo) - : P(PP, *CreateASTBuilderActions(FullLocInfo)) { + : P(PP, *CreateASTBuilderActions(PP, FullLocInfo)) { PP.EnterSourceFile(MainFileID, 0, true); // Initialize the parser. diff --git a/clang/Sema/Sema.cpp b/clang/Sema/Sema.cpp index 06eed86d442..cae97fc2bd6 100644 --- a/clang/Sema/Sema.cpp +++ b/clang/Sema/Sema.cpp @@ -18,18 +18,23 @@ #include "clang/Parse/Scope.h" #include "clang/Lex/IdentifierTable.h" #include "clang/Lex/LexerToken.h" -#include "llvm/Support/Visibility.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/Support/Compiler.h" using namespace llvm; using namespace clang; /// ASTBuilder namespace { class VISIBILITY_HIDDEN ASTBuilder : public Action { + Preprocessor &PP; + /// FullLocInfo - If this is true, the ASTBuilder constructs AST Nodes that /// capture maximal location information for each source-language construct. bool FullLocInfo; public: - ASTBuilder(bool fullLocInfo) : FullLocInfo(fullLocInfo) {} + ASTBuilder(Preprocessor &pp, bool fullLocInfo) + : PP(pp), FullLocInfo(fullLocInfo) {} + //===--------------------------------------------------------------------===// // Symbol table tracking callbacks. // @@ -47,6 +52,9 @@ public: virtual ExprResult ParseFloatingConstant(const LexerToken &Tok); virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R, ExprTy *Val); + virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen, + bool isWide, + const LexerToken *Toks, unsigned NumToks); // Binary/Unary Operators. 'Tok' is the token for the operator. virtual ExprResult ParseUnaryOp(const LexerToken &Tok, ExprTy *Input); @@ -166,6 +174,26 @@ Action::ExprResult ASTBuilder::ParseParenExpr(SourceLocation L, return new ParenExpr(L, R, (Expr*)Val); } +/// ParseStringExpr - This accepts a string after semantic analysis. This string +/// may be the result of string concatenation ([C99 5.1.1.2, translation phase +/// #6]), so it may come from multiple tokens. +/// +Action::ExprResult ASTBuilder:: +ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide, + const LexerToken *Toks, unsigned NumToks) { + assert(NumToks && "Must have at least one string!"); + + if (!FullLocInfo) + return new StringExpr(StrData, StrLen, isWide); + else { + SmallVector<SourceLocation, 4> Locs; + for (unsigned i = 0; i != NumToks; ++i) + Locs.push_back(Toks[i].getLocation()); + return new StringExprLOC(StrData, StrLen, isWide, &Locs[0], Locs.size()); + } +} + + // Unary Operators. 'Tok' is the token for the operator. Action::ExprResult ASTBuilder::ParseUnaryOp(const LexerToken &Tok, ExprTy *Input) { @@ -326,8 +354,8 @@ Action::ExprResult ASTBuilder::ParseConditionalOp(SourceLocation QuestionLoc, /// Interface to the Builder.cpp file. /// -Action *CreateASTBuilderActions(bool FullLocInfo) { - return new ASTBuilder(FullLocInfo); +Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo) { + return new ASTBuilder(PP, FullLocInfo); } diff --git a/clang/clang.xcodeproj/project.pbxproj b/clang/clang.xcodeproj/project.pbxproj index 728f768c664..260cb9e1495 100644 --- a/clang/clang.xcodeproj/project.pbxproj +++ b/clang/clang.xcodeproj/project.pbxproj @@ -424,7 +424,7 @@ 1DEB923208733DC60010E9CD /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { - ARCHS = ppc; + ARCHS = i386; COPY_PHASE_STRIP = NO; GCC_CW_ASM_SYNTAX = NO; GCC_DYNAMIC_NO_PIC = NO; @@ -460,7 +460,7 @@ 1DEB923308733DC60010E9CD /* Release */ = { isa = XCBuildConfiguration; buildSettings = { - ARCHS = ppc; + ARCHS = i386; GCC_CW_ASM_SYNTAX = NO; GCC_ENABLE_CPP_EXCEPTIONS = NO; GCC_ENABLE_CPP_RTTI = NO; diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 472e889ca8e..a7cdd150e58 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -15,7 +15,7 @@ #define LLVM_CLANG_AST_EXPR_H #include "clang/Basic/SourceLocation.h" -#include <cassert> +#include "llvm/ADT/SmallVector.h" namespace llvm { namespace clang { @@ -64,6 +64,26 @@ public: virtual void dump_impl() const; }; +class StringExpr : public Expr { + const char *StrData; + unsigned ByteLength; + bool isWide; +public: + StringExpr(const char *strData, unsigned byteLength, bool Wide); + virtual ~StringExpr(); + virtual void dump_impl() const; +}; + +class StringExprLOC : public StringExpr { + // Locations for the string tokens before string concatenation. + SmallVector<SourceLocation, 4> Locs; +public: + StringExprLOC(const char *StrData, unsigned ByteLength, bool isWide, + SourceLocation *L, unsigned NumLocs) + : StringExpr(StrData, ByteLength, isWide), Locs(L, L+NumLocs) { + } +}; + /// ParenExpr - This represents a parethesized expression, e.g. "(1)". This /// AST node is only formed if full location information is requested. class ParenExpr : public Expr { diff --git a/clang/include/clang/Basic/DiagnosticKinds.def b/clang/include/clang/Basic/DiagnosticKinds.def index 25d842106de..2dcf0467d83 100644 --- a/clang/include/clang/Basic/DiagnosticKinds.def +++ b/clang/include/clang/Basic/DiagnosticKinds.def @@ -342,6 +342,18 @@ DIAG(err_expected_asm_operand, ERROR, DIAG(err_matching, ERROR, "to match this '%s'") +//===----------------------------------------------------------------------===// +// Semantic Analysis +//===----------------------------------------------------------------------===// + +DIAG(ext_nonstandard_escape, EXTENSION, + "use of non-standard escape character '\\%s'") +DIAG(ext_unknown_escape, EXTENSION, + "unknown escape sequence '\\%s'") + +DIAG(err_hex_escape_no_digits, ERROR, + "\\x used with no following hex digits") + DIAG(err_typename_requires_specqual, ERROR, "type name requires a specifier or qualifier") DIAG(err_typename_invalid_storageclass, ERROR, diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index f7f20609ff8..886ad8faae5 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -42,7 +42,8 @@ TOK(numeric_constant) // 0x123 TOK(char_constant) // 'a' L'b' // C99 6.4.5: String Literals. -TOK(string_literal) // "foo" L"foo" +TOK(string_literal) // "foo" +TOK(wide_string_literal) // L"foo" TOK(angle_string_literal)// <foo> // C99 6.4.6: Punctuators. diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index c507222fa85..7ef310e957a 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -329,7 +329,7 @@ private: // Helper functions to lex the remainder of a token of the specific type. void LexIdentifier (LexerToken &Result, const char *CurPtr); void LexNumericConstant (LexerToken &Result, const char *CurPtr); - void LexStringLiteral (LexerToken &Result, const char *CurPtr); + void LexStringLiteral (LexerToken &Result, const char *CurPtr,bool Wide); void LexAngledStringLiteral(LexerToken &Result, const char *CurPtr); void LexCharConstant (LexerToken &Result, const char *CurPtr); bool LexEndOfFile (LexerToken &Result, const char *CurPtr); diff --git a/clang/include/clang/Parse/Action.h b/clang/include/clang/Parse/Action.h index 6e40195605b..74f7d83367d 100644 --- a/clang/include/clang/Parse/Action.h +++ b/clang/include/clang/Parse/Action.h @@ -94,11 +94,19 @@ public: virtual ExprResult ParseSimplePrimaryExpr(const LexerToken &Tok) { return 0; } virtual ExprResult ParseIntegerConstant(const LexerToken &Tok) { return 0; } virtual ExprResult ParseFloatingConstant(const LexerToken &Tok) { return 0; } - virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R, ExprTy *Val) { return Val; // Default impl returns operand. } + + /// ParseStringExpr - The (null terminated) string data is specified with + /// StrData+StrLen. isWide is true if this is a wide string. The Toks/NumToks + /// array exposes the input tokens to provide location information. + virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen, + bool isWide, + const LexerToken *Toks, unsigned NumToks) { + return 0; + } // Postfix Expressions. virtual ExprResult ParsePostfixUnaryOp(const LexerToken &Tok, ExprTy *Input) { diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 5667d996e0c..edb980b424d 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -89,7 +89,8 @@ private: /// isTokenStringLiteral - True if this token is a string-literal. /// bool isTokenStringLiteral() const { - return Tok.getKind() == tok::string_literal; + return Tok.getKind() == tok::string_literal || + Tok.getKind() == tok::wide_string_literal; } /// ConsumeToken - Consume the current 'peek token' and lex the next one. |

