diff options
| author | Chris Lattner <sabre@nondot.org> | 2010-11-17 07:37:15 +0000 |
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2010-11-17 07:37:15 +0000 |
| commit | e925d6178568a70fab68adc4d0e878520a90ec16 (patch) | |
| tree | 3647e4d5c44c719a4ff9647837fcbf25a80c0621 /clang/lib/AST/Expr.cpp | |
| parent | 35d023164c410181c78944a2d68c664689011823 (diff) | |
| download | bcm5719-llvm-e925d6178568a70fab68adc4d0e878520a90ec16.tar.gz bcm5719-llvm-e925d6178568a70fab68adc4d0e878520a90ec16.zip | |
a metric ton of refactoring later, Sema::getLocationOfStringLiteralByte
no longer depends on Preprocessor, so we can move it out of Sema into
a nice new StringLiteral::getLocationOfByte method that can be used by
any AST client.
llvm-svn: 119481
Diffstat (limited to 'clang/lib/AST/Expr.cpp')
| -rw-r--r-- | clang/lib/AST/Expr.cpp | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index e36c02f3a99..b03594f8f98 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -20,7 +20,10 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/Lexer.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -467,6 +470,72 @@ void StringLiteral::setString(ASTContext &C, llvm::StringRef Str) { ByteLength = Str.size(); } +/// getLocationOfByte - Return a source location that points to the specified +/// byte of this string literal. +/// +/// Strings are amazingly complex. They can be formed from multiple tokens and +/// can have escape sequences in them in addition to the usual trigraph and +/// escaped newline business. This routine handles this complexity. +/// +SourceLocation StringLiteral:: +getLocationOfByte(unsigned ByteNo, const SourceManager &SM, + const LangOptions &Features, const TargetInfo &Target) const { + assert(!isWide() && "This doesn't work for wide strings yet"); + + // Loop over all of the tokens in this string until we find the one that + // contains the byte we're looking for. + unsigned TokNo = 0; + while (1) { + assert(TokNo < getNumConcatenated() && "Invalid byte number!"); + SourceLocation StrTokLoc = getStrTokenLoc(TokNo); + + // Get the spelling of the string so that we can get the data that makes up + // the string literal, not the identifier for the macro it is potentially + // expanded through. + SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc); + + // Re-lex the token to get its length and original spelling. + std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc); + bool Invalid = false; + llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); + if (Invalid) + return StrTokSpellingLoc; + + const char *StrData = Buffer.data()+LocInfo.second; + + // Create a langops struct and enable trigraphs. This is sufficient for + // relexing tokens. + LangOptions LangOpts; + LangOpts.Trigraphs = true; + + // Create a lexer starting at the beginning of this token. + Lexer TheLexer(StrTokSpellingLoc, Features, Buffer.begin(), StrData, + Buffer.end()); + Token TheTok; + TheLexer.LexFromRawLexer(TheTok); + + // Use the StringLiteralParser to compute the length of the string in bytes. + StringLiteralParser SLP(&TheTok, 1, SM, Features, Target); + unsigned TokNumBytes = SLP.GetStringLength(); + + // If the byte is in this token, return the location of the byte. + if (ByteNo < TokNumBytes || + (ByteNo == TokNumBytes && TokNo == getNumConcatenated())) { + unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); + + // Now that we know the offset of the token in the spelling, use the + // preprocessor to get the offset in the original source. + return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features); + } + + // Move to the next string token. + ++TokNo; + ByteNo -= TokNumBytes; + } +} + + + /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it /// corresponds to, e.g. "sizeof" or "[pre]++". const char *UnaryOperator::getOpcodeStr(Opcode Op) { |

