diff options
author | Alexander Kornienko <alexfh@google.com> | 2013-06-05 14:09:10 +0000 |
---|---|---|
committer | Alexander Kornienko <alexfh@google.com> | 2013-06-05 14:09:10 +0000 |
commit | ffcc010767573c657ee0e6c0c9ea82ca124003ab (patch) | |
tree | 945819aeda9957c1232c7e2f7329e0c7a147b3e2 /clang/lib/Format/TokenAnnotator.cpp | |
parent | 218f6d8f59f55c848d335d89cbdd84706f7e096c (diff) | |
download | bcm5719-llvm-ffcc010767573c657ee0e6c0c9ea82ca124003ab.tar.gz bcm5719-llvm-ffcc010767573c657ee0e6c0c9ea82ca124003ab.zip |
UTF-8 support for clang-format.
Summary:
Detect if the file is valid UTF-8, and if this is the case, count code
points instead of just using number of bytes in all (hopefully) places, where
number of columns is needed. In particular, use the new
FormatToken.CodePointCount instead of TokenLength where appropriate.
Changed BreakableToken implementations to respect utf-8 character boundaries
when in utf-8 mode.
Reviewers: klimek, djasper
Reviewed By: djasper
CC: cfe-commits, rsmith, gribozavr
Differential Revision: http://llvm-reviews.chandlerc.com/D918
llvm-svn: 183312
Diffstat (limited to 'clang/lib/Format/TokenAnnotator.cpp')
-rw-r--r-- | clang/lib/Format/TokenAnnotator.cpp | 39 |
1 files changed, 16 insertions, 23 deletions
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 83dea841b5e..62177b3efd7 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -15,7 +15,6 @@ #include "TokenAnnotator.h" #include "clang/Basic/SourceManager.h" -#include "clang/Lex/Lexer.h" #include "llvm/Support/Debug.h" namespace clang { @@ -28,10 +27,9 @@ namespace format { /// into template parameter lists. class AnnotatingParser { public: - AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line, - IdentifierInfo &Ident_in) - : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(Line.First), - KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) { + AnnotatingParser(AnnotatedLine &Line, IdentifierInfo &Ident_in) + : Line(Line), CurrentToken(Line.First), KeywordVirtualFound(false), + NameFound(false), Ident_in(Ident_in) { Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false)); } @@ -295,9 +293,11 @@ private: Line.First->Type == TT_ObjCMethodSpecifier) { Tok->Type = TT_ObjCMethodExpr; Tok->Previous->Type = TT_ObjCSelectorName; - if (Tok->Previous->TokenLength > - Contexts.back().LongestObjCSelectorName) - Contexts.back().LongestObjCSelectorName = Tok->Previous->TokenLength; + if (Tok->Previous->CodePointCount > + Contexts.back().LongestObjCSelectorName) { + Contexts.back().LongestObjCSelectorName = + Tok->Previous->CodePointCount; + } if (Contexts.back().FirstObjCSelectorName == NULL) Contexts.back().FirstObjCSelectorName = Tok->Previous; } else if (Contexts.back().ColonIsForRangeExpr) { @@ -602,9 +602,7 @@ private: } else if (Current.isBinaryOperator()) { Current.Type = TT_BinaryOperator; } else if (Current.is(tok::comment)) { - std::string Data( - Lexer::getSpelling(Current.Tok, SourceMgr, Lex.getLangOpts())); - if (StringRef(Data).startswith("//")) + if (Current.TokenText.startswith("//")) Current.Type = TT_LineComment; else Current.Type = TT_BlockComment; @@ -748,23 +746,19 @@ private: case tok::kw_wchar_t: case tok::kw_bool: case tok::kw___underlying_type: - return true; case tok::annot_typename: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_typeof: case tok::kw_decltype: - return Lex.getLangOpts().CPlusPlus; + return true; default: - break; + return false; } - return false; } SmallVector<Context, 8> Contexts; - SourceManager &SourceMgr; - Lexer &Lex; AnnotatedLine &Line; FormatToken *CurrentToken; bool KeywordVirtualFound; @@ -866,7 +860,7 @@ private: }; void TokenAnnotator::annotate(AnnotatedLine &Line) { - AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in); + AnnotatingParser Parser(Line, Ident_in); Line.Type = Parser.parseLine(); if (Line.Type == LT_Invalid) return; @@ -886,7 +880,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { } void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { - Line.First->TotalLength = Line.First->TokenLength; + Line.First->TotalLength = Line.First->CodePointCount; if (!Line.First->Next) return; FormatToken *Current = Line.First->Next; @@ -920,7 +914,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit; else Current->TotalLength = - Current->Previous->TotalLength + Current->TokenLength + + Current->Previous->TotalLength + Current->CodePointCount + Current->SpacesRequiredBefore; // FIXME: Only calculate this if CanBreakBefore is true once static // initializers etc. are sorted out. @@ -947,7 +941,7 @@ void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { UnbreakableTailLength = 0; } else { UnbreakableTailLength += - Current->TokenLength + Current->SpacesRequiredBefore; + Current->CodePointCount + Current->SpacesRequiredBefore; } Current = Current->Previous; } @@ -1015,8 +1009,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Right.is(tok::lessless)) { if (Left.is(tok::string_literal)) { - StringRef Content = - StringRef(Left.Tok.getLiteralData(), Left.TokenLength); + StringRef Content = Left.TokenText; Content = Content.drop_back(1).drop_front(1).trim(); if (Content.size() > 1 && (Content.back() == ':' || Content.back() == '=')) |