summaryrefslogtreecommitdiffstats
path: root/clang/lib/Format/FormatToken.h
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-06-05 14:09:10 +0000
committerAlexander Kornienko <alexfh@google.com>2013-06-05 14:09:10 +0000
commitffcc010767573c657ee0e6c0c9ea82ca124003ab (patch)
tree945819aeda9957c1232c7e2f7329e0c7a147b3e2 /clang/lib/Format/FormatToken.h
parent218f6d8f59f55c848d335d89cbdd84706f7e096c (diff)
downloadbcm5719-llvm-ffcc010767573c657ee0e6c0c9ea82ca124003ab.tar.gz
bcm5719-llvm-ffcc010767573c657ee0e6c0c9ea82ca124003ab.zip
UTF-8 support for clang-format.
Summary: Detect if the file is valid UTF-8, and if this is the case, count code points instead of just using number of bytes in all (hopefully) places, where number of columns is needed. In particular, use the new FormatToken.CodePointCount instead of TokenLength where appropriate. Changed BreakableToken implementations to respect utf-8 character boundaries when in utf-8 mode. Reviewers: klimek, djasper Reviewed By: djasper CC: cfe-commits, rsmith, gribozavr Differential Revision: http://llvm-reviews.chandlerc.com/D918 llvm-svn: 183312
Diffstat (limited to 'clang/lib/Format/FormatToken.h')
-rw-r--r--clang/lib/Format/FormatToken.h21
1 files changed, 13 insertions, 8 deletions
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 4a5e20dd4c6..fd1bd7e1cf8 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -61,11 +61,12 @@ enum TokenType {
struct FormatToken {
FormatToken()
: NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
- TokenLength(0), IsFirst(false), MustBreakBefore(false),
- Type(TT_Unknown), SpacesRequiredBefore(0), CanBreakBefore(false),
- ClosesTemplateDeclaration(false), ParameterCount(0), TotalLength(0),
- UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
- LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
+ ByteCount(0), CodePointCount(0), IsFirst(false),
+ MustBreakBefore(false), Type(TT_Unknown), SpacesRequiredBefore(0),
+ CanBreakBefore(false), ClosesTemplateDeclaration(false),
+ ParameterCount(0), TotalLength(0), UnbreakableTailLength(0),
+ BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
+ FakeRParens(0), LastInChainOfCalls(false),
PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
Next(NULL) {}
@@ -89,10 +90,14 @@ struct FormatToken {
/// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
unsigned LastNewlineOffset;
- /// \brief The length of the non-whitespace parts of the token. This is
- /// necessary because we need to handle escaped newlines that are stored
+ /// \brief The number of bytes of the non-whitespace parts of the token. This
+ /// is necessary because we need to handle escaped newlines that are stored
/// with the token.
- unsigned TokenLength;
+ unsigned ByteCount;
+
+ /// \brief The length of the non-whitespace parts of the token in CodePoints.
+ /// We need this to correctly measure number of columns a token spans.
+ unsigned CodePointCount;
/// \brief Indicates that this is the first token.
bool IsFirst;
OpenPOWER on IntegriCloud