summaryrefslogtreecommitdiffstats
path: root/clang/lib/Format/TokenAnnotator.cpp
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-06-05 14:09:10 +0000
committerAlexander Kornienko <alexfh@google.com>2013-06-05 14:09:10 +0000
commitffcc010767573c657ee0e6c0c9ea82ca124003ab (patch)
tree945819aeda9957c1232c7e2f7329e0c7a147b3e2 /clang/lib/Format/TokenAnnotator.cpp
parent218f6d8f59f55c848d335d89cbdd84706f7e096c (diff)
downloadbcm5719-llvm-ffcc010767573c657ee0e6c0c9ea82ca124003ab.tar.gz
bcm5719-llvm-ffcc010767573c657ee0e6c0c9ea82ca124003ab.zip
UTF-8 support for clang-format.
Summary: Detect if the file is valid UTF-8, and if this is the case, count code points instead of just using number of bytes in all (hopefully) places, where number of columns is needed. In particular, use the new FormatToken.CodePointCount instead of TokenLength where appropriate. Changed BreakableToken implementations to respect utf-8 character boundaries when in utf-8 mode. Reviewers: klimek, djasper Reviewed By: djasper CC: cfe-commits, rsmith, gribozavr Differential Revision: http://llvm-reviews.chandlerc.com/D918 llvm-svn: 183312
Diffstat (limited to 'clang/lib/Format/TokenAnnotator.cpp')
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp39
1 files changed, 16 insertions, 23 deletions
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 83dea841b5e..62177b3efd7 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -15,7 +15,6 @@
#include "TokenAnnotator.h"
#include "clang/Basic/SourceManager.h"
-#include "clang/Lex/Lexer.h"
#include "llvm/Support/Debug.h"
namespace clang {
@@ -28,10 +27,9 @@ namespace format {
/// into template parameter lists.
class AnnotatingParser {
public:
- AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
- IdentifierInfo &Ident_in)
- : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(Line.First),
- KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) {
+ AnnotatingParser(AnnotatedLine &Line, IdentifierInfo &Ident_in)
+ : Line(Line), CurrentToken(Line.First), KeywordVirtualFound(false),
+ NameFound(false), Ident_in(Ident_in) {
Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false));
}
@@ -295,9 +293,11 @@ private:
Line.First->Type == TT_ObjCMethodSpecifier) {
Tok->Type = TT_ObjCMethodExpr;
Tok->Previous->Type = TT_ObjCSelectorName;
- if (Tok->Previous->TokenLength >
- Contexts.back().LongestObjCSelectorName)
- Contexts.back().LongestObjCSelectorName = Tok->Previous->TokenLength;
+ if (Tok->Previous->CodePointCount >
+ Contexts.back().LongestObjCSelectorName) {
+ Contexts.back().LongestObjCSelectorName =
+ Tok->Previous->CodePointCount;
+ }
if (Contexts.back().FirstObjCSelectorName == NULL)
Contexts.back().FirstObjCSelectorName = Tok->Previous;
} else if (Contexts.back().ColonIsForRangeExpr) {
@@ -602,9 +602,7 @@ private:
} else if (Current.isBinaryOperator()) {
Current.Type = TT_BinaryOperator;
} else if (Current.is(tok::comment)) {
- std::string Data(
- Lexer::getSpelling(Current.Tok, SourceMgr, Lex.getLangOpts()));
- if (StringRef(Data).startswith("//"))
+ if (Current.TokenText.startswith("//"))
Current.Type = TT_LineComment;
else
Current.Type = TT_BlockComment;
@@ -748,23 +746,19 @@ private:
case tok::kw_wchar_t:
case tok::kw_bool:
case tok::kw___underlying_type:
- return true;
case tok::annot_typename:
case tok::kw_char16_t:
case tok::kw_char32_t:
case tok::kw_typeof:
case tok::kw_decltype:
- return Lex.getLangOpts().CPlusPlus;
+ return true;
default:
- break;
+ return false;
}
- return false;
}
SmallVector<Context, 8> Contexts;
- SourceManager &SourceMgr;
- Lexer &Lex;
AnnotatedLine &Line;
FormatToken *CurrentToken;
bool KeywordVirtualFound;
@@ -866,7 +860,7 @@ private:
};
void TokenAnnotator::annotate(AnnotatedLine &Line) {
- AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
+ AnnotatingParser Parser(Line, Ident_in);
Line.Type = Parser.parseLine();
if (Line.Type == LT_Invalid)
return;
@@ -886,7 +880,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
}
void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
- Line.First->TotalLength = Line.First->TokenLength;
+ Line.First->TotalLength = Line.First->CodePointCount;
if (!Line.First->Next)
return;
FormatToken *Current = Line.First->Next;
@@ -920,7 +914,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
else
Current->TotalLength =
- Current->Previous->TotalLength + Current->TokenLength +
+ Current->Previous->TotalLength + Current->CodePointCount +
Current->SpacesRequiredBefore;
// FIXME: Only calculate this if CanBreakBefore is true once static
// initializers etc. are sorted out.
@@ -947,7 +941,7 @@ void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
UnbreakableTailLength = 0;
} else {
UnbreakableTailLength +=
- Current->TokenLength + Current->SpacesRequiredBefore;
+ Current->CodePointCount + Current->SpacesRequiredBefore;
}
Current = Current->Previous;
}
@@ -1015,8 +1009,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
if (Right.is(tok::lessless)) {
if (Left.is(tok::string_literal)) {
- StringRef Content =
- StringRef(Left.Tok.getLiteralData(), Left.TokenLength);
+ StringRef Content = Left.TokenText;
Content = Content.drop_back(1).drop_front(1).trim();
if (Content.size() > 1 &&
(Content.back() == ':' || Content.back() == '='))
OpenPOWER on IntegriCloud