diff options
author | Alexander Kornienko <alexfh@google.com> | 2013-06-05 14:09:10 +0000 |
---|---|---|
committer | Alexander Kornienko <alexfh@google.com> | 2013-06-05 14:09:10 +0000 |
commit | ffcc010767573c657ee0e6c0c9ea82ca124003ab (patch) | |
tree | 945819aeda9957c1232c7e2f7329e0c7a147b3e2 /clang/lib/Format/FormatToken.h | |
parent | 218f6d8f59f55c848d335d89cbdd84706f7e096c (diff) | |
download | bcm5719-llvm-ffcc010767573c657ee0e6c0c9ea82ca124003ab.tar.gz bcm5719-llvm-ffcc010767573c657ee0e6c0c9ea82ca124003ab.zip |
UTF-8 support for clang-format.
Summary:
Detect if the file is valid UTF-8, and if this is the case, count code
points instead of just using number of bytes in all (hopefully) places, where
number of columns is needed. In particular, use the new
FormatToken.CodePointCount instead of TokenLength where appropriate.
Changed BreakableToken implementations to respect utf-8 character boundaries
when in utf-8 mode.
Reviewers: klimek, djasper
Reviewed By: djasper
CC: cfe-commits, rsmith, gribozavr
Differential Revision: http://llvm-reviews.chandlerc.com/D918
llvm-svn: 183312
Diffstat (limited to 'clang/lib/Format/FormatToken.h')
-rw-r--r-- | clang/lib/Format/FormatToken.h | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 4a5e20dd4c6..fd1bd7e1cf8 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -61,11 +61,12 @@ enum TokenType { struct FormatToken { FormatToken() : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), - TokenLength(0), IsFirst(false), MustBreakBefore(false), - Type(TT_Unknown), SpacesRequiredBefore(0), CanBreakBefore(false), - ClosesTemplateDeclaration(false), ParameterCount(0), TotalLength(0), - UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0), - LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false), + ByteCount(0), CodePointCount(0), IsFirst(false), + MustBreakBefore(false), Type(TT_Unknown), SpacesRequiredBefore(0), + CanBreakBefore(false), ClosesTemplateDeclaration(false), + ParameterCount(0), TotalLength(0), UnbreakableTailLength(0), + BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0), + FakeRParens(0), LastInChainOfCalls(false), PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL), Next(NULL) {} @@ -89,10 +90,14 @@ struct FormatToken { /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. unsigned LastNewlineOffset; - /// \brief The length of the non-whitespace parts of the token. This is - /// necessary because we need to handle escaped newlines that are stored + /// \brief The number of bytes of the non-whitespace parts of the token. This + /// is necessary because we need to handle escaped newlines that are stored /// with the token. - unsigned TokenLength; + unsigned ByteCount; + + /// \brief The length of the non-whitespace parts of the token in CodePoints. + /// We need this to correctly measure number of columns a token spans. + unsigned CodePointCount; /// \brief Indicates that this is the first token. bool IsFirst; |