Change Preprocessor::AdvanceToTokenCharacter to stop at

the first real character of a token. For example, advancing to byte 3 of foo\ bar should stop at the b, not the \. llvm-svn: 69484
author: Chris Lattner <sabre@nondot.org> 2009-04-18 22:28:58 +0000
committer: Chris Lattner <sabre@nondot.org> 2009-04-18 22:28:58 +0000
commit: 93017cc12a15c742469a57a14f0665143fe440ff (patch)
tree: 4ca81c4d57d3b2c562f91478bbbb5a1c5edc414f /clang/lib/Lex
parent: 38b2cde4c4c877664ef25dd513d033b4dd5866c3 (diff)
download: bcm5719-llvm-93017cc12a15c742469a57a14f0665143fe440ff.tar.gz
bcm5719-llvm-93017cc12a15c742469a57a14f0665143fe440ff.zip
1 files changed, 21 insertions, 12 deletions
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 70294417d14..07fab811370 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -292,33 +292,42 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
 /// token, return a new location that specifies a character within the token.
 SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, 
                                                      unsigned CharNo) {
-  // If they request the first char of the token, we're trivially done.
-  if (CharNo == 0) return TokStart;
-  
   // Figure out how many physical characters away the specified instantiation
   // character is.  This needs to take into consideration newlines and
   // trigraphs.
   const char *TokPtr = SourceMgr.getCharacterData(TokStart);
+  
+  // If they request the first char of the token, we're trivially done.
+  if (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))
+    return TokStart;
+  
   unsigned PhysOffset = 0;
   
   // The usual case is that tokens don't contain anything interesting.  Skip
   // over the uninteresting characters.  If a token only consists of simple
   // chars, this method is extremely fast.
-  while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr))
+  while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
+    if (CharNo == 0)
+      return TokStart.getFileLocWithOffset(PhysOffset);
     ++TokPtr, --CharNo, ++PhysOffset;
+  }
   
   // If we have a character that may be a trigraph or escaped newline, use a
   // lexer to parse it correctly.
-  if (CharNo != 0) {
-    // Skip over the remaining characters.
-    for (; CharNo; --CharNo) {
-      unsigned Size;
-      Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
-      TokPtr += Size;
-      PhysOffset += Size;
-    }
+  for (; CharNo; --CharNo) {
+    unsigned Size;
+    Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
+    TokPtr += Size;
+    PhysOffset += Size;
   }
   
+  // Final detail: if we end up on an escaped newline, we want to return the
+  // location of the actual byte of the token.  For example foo\<newline>bar
+  // advanced by 3 should return the location of b, not of \\.  One compounding
+  // detail of this is that the escape may be made by a trigraph.
+  if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
+    PhysOffset = Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
+  
   return TokStart.getFileLocWithOffset(PhysOffset);
 }
author	Chris Lattner <sabre@nondot.org>	2009-04-18 22:28:58 +0000
committer	Chris Lattner <sabre@nondot.org>	2009-04-18 22:28:58 +0000
commit	93017cc12a15c742469a57a14f0665143fe440ff (patch)
tree	4ca81c4d57d3b2c562f91478bbbb5a1c5edc414f /clang/lib/Lex
parent	38b2cde4c4c877664ef25dd513d033b4dd5866c3 (diff)
download	bcm5719-llvm-93017cc12a15c742469a57a14f0665143fe440ff.tar.gz bcm5719-llvm-93017cc12a15c742469a57a14f0665143fe440ff.zip