summaryrefslogtreecommitdiffstats
path: root/clang/lib/Format/Format.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Format/Format.cpp')
-rw-r--r--clang/lib/Format/Format.cpp93
1 files changed, 56 insertions, 37 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 63bf09317e3..9dd5e4a0f21 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -243,10 +243,11 @@ public:
UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
const AnnotatedLine &Line, unsigned FirstIndent,
const FormatToken *RootToken,
- WhitespaceManager &Whitespaces)
+ WhitespaceManager &Whitespaces,
+ encoding::Encoding Encoding)
: Style(Style), SourceMgr(SourceMgr), Line(Line),
FirstIndent(FirstIndent), RootToken(RootToken),
- Whitespaces(Whitespaces), Count(0) {}
+ Whitespaces(Whitespaces), Count(0), Encoding(Encoding) {}
/// \brief Formats an \c UnwrappedLine.
void format(const AnnotatedLine *NextLine) {
@@ -484,7 +485,7 @@ private:
State.NextToken->WhitespaceRange.getEnd()) -
SourceMgr.getSpellingColumnNumber(
State.NextToken->WhitespaceRange.getBegin());
- State.Column += WhitespaceLength + State.NextToken->TokenLength;
+ State.Column += WhitespaceLength + State.NextToken->CodePointCount;
State.NextToken = State.NextToken->Next;
return 0;
}
@@ -520,11 +521,11 @@ private:
Line.StartsDefinition)) {
State.Column = State.Stack.back().Indent;
} else if (Current.Type == TT_ObjCSelectorName) {
- if (State.Stack.back().ColonPos > Current.TokenLength) {
- State.Column = State.Stack.back().ColonPos - Current.TokenLength;
+ if (State.Stack.back().ColonPos > Current.CodePointCount) {
+ State.Column = State.Stack.back().ColonPos - Current.CodePointCount;
} else {
State.Column = State.Stack.back().Indent;
- State.Stack.back().ColonPos = State.Column + Current.TokenLength;
+ State.Stack.back().ColonPos = State.Column + Current.CodePointCount;
}
} else if (Current.Type == TT_StartOfName ||
Previous.isOneOf(tok::coloncolon, tok::equal) ||
@@ -560,7 +561,7 @@ private:
State.Stack.back().LastSpace = State.Column;
if (Current.isOneOf(tok::arrow, tok::period) &&
Current.Type != TT_DesignatedInitializerPeriod)
- State.Stack.back().LastSpace += Current.TokenLength;
+ State.Stack.back().LastSpace += Current.CodePointCount;
State.StartOfLineLevel = State.ParenLevel;
State.LowestCallLevel = State.ParenLevel;
@@ -595,8 +596,8 @@ private:
State.Stack.back().VariablePos = State.Column;
// Move over * and & if they are bound to the variable name.
const FormatToken *Tok = &Previous;
- while (Tok && State.Stack.back().VariablePos >= Tok->TokenLength) {
- State.Stack.back().VariablePos -= Tok->TokenLength;
+ while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) {
+ State.Stack.back().VariablePos -= Tok->CodePointCount;
if (Tok->SpacesRequiredBefore != 0)
break;
Tok = Tok->Previous;
@@ -614,12 +615,12 @@ private:
if (Current.Type == TT_ObjCSelectorName &&
State.Stack.back().ColonPos == 0) {
if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
- State.Column + Spaces + Current.TokenLength)
+ State.Column + Spaces + Current.CodePointCount)
State.Stack.back().ColonPos =
State.Stack.back().Indent + Current.LongestObjCSelectorName;
else
State.Stack.back().ColonPos =
- State.Column + Spaces + Current.TokenLength;
+ State.Column + Spaces + Current.CodePointCount;
}
if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
@@ -671,7 +672,8 @@ private:
State.LowestCallLevel = std::min(State.LowestCallLevel, State.ParenLevel);
if (Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
State.Stack.back().StartOfFunctionCall =
- Current.LastInChainOfCalls ? 0 : State.Column + Current.TokenLength;
+ Current.LastInChainOfCalls ? 0
+ : State.Column + Current.CodePointCount;
}
if (Current.Type == TT_CtorInitializerColon) {
// Indent 2 from the column, so:
@@ -779,7 +781,7 @@ private:
State.StartOfStringLiteral = 0;
}
- State.Column += Current.TokenLength;
+ State.Column += Current.CodePointCount;
State.NextToken = State.NextToken->Next;
@@ -798,7 +800,7 @@ private:
bool DryRun) {
unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
llvm::OwningPtr<BreakableToken> Token;
- unsigned StartColumn = State.Column - Current.TokenLength;
+ unsigned StartColumn = State.Column - Current.CodePointCount;
unsigned OriginalStartColumn =
SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
1;
@@ -811,15 +813,16 @@ private:
if (!LiteralData || *LiteralData != '"')
return 0;
- Token.reset(new BreakableStringLiteral(Current, StartColumn));
+ Token.reset(new BreakableStringLiteral(Current, StartColumn, Encoding));
} else if (Current.Type == TT_BlockComment) {
BreakableBlockComment *BBC = new BreakableBlockComment(
- Style, Current, StartColumn, OriginalStartColumn, !Current.Previous);
+ Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
+ Encoding);
Token.reset(BBC);
} else if (Current.Type == TT_LineComment &&
(Current.Previous == NULL ||
Current.Previous->Type != TT_ImplicitStringLiteral)) {
- Token.reset(new BreakableLineComment(Current, StartColumn));
+ Token.reset(new BreakableLineComment(Current, StartColumn, Encoding));
} else {
return 0;
}
@@ -837,27 +840,27 @@ private:
Whitespaces);
}
unsigned TailOffset = 0;
- unsigned RemainingTokenLength =
+ unsigned RemainingTokenColumns =
Token->getLineLengthAfterSplit(LineIndex, TailOffset);
- while (RemainingTokenLength > RemainingSpace) {
+ while (RemainingTokenColumns > RemainingSpace) {
BreakableToken::Split Split =
Token->getSplit(LineIndex, TailOffset, getColumnLimit());
if (Split.first == StringRef::npos)
break;
assert(Split.first != 0);
- unsigned NewRemainingTokenLength = Token->getLineLengthAfterSplit(
+ unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
LineIndex, TailOffset + Split.first + Split.second);
- assert(NewRemainingTokenLength < RemainingTokenLength);
+ assert(NewRemainingTokenColumns < RemainingTokenColumns);
if (!DryRun) {
Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
Whitespaces);
}
TailOffset += Split.first + Split.second;
- RemainingTokenLength = NewRemainingTokenLength;
+ RemainingTokenColumns = NewRemainingTokenColumns;
Penalty += Style.PenaltyExcessCharacter;
BreakInserted = true;
}
- PositionAfterLastLineInToken = RemainingTokenLength;
+ PositionAfterLastLineInToken = RemainingTokenColumns;
}
if (BreakInserted) {
@@ -1080,13 +1083,16 @@ private:
// Increasing count of \c StateNode items we have created. This is used
// to create a deterministic order independent of the container.
unsigned Count;
+ encoding::Encoding Encoding;
};
class FormatTokenLexer {
public:
- FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr)
+ FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr,
+ encoding::Encoding Encoding)
: FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
- SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()) {
+ SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()),
+ Encoding(Encoding) {
Lex.SetKeepWhitespaceMode(true);
}
@@ -1111,7 +1117,8 @@ private:
FormatTok->Tok.getLocation().getLocWithOffset(1);
FormatTok->WhitespaceRange =
SourceRange(GreaterLocation, GreaterLocation);
- FormatTok->TokenLength = 1;
+ FormatTok->ByteCount = 1;
+ FormatTok->CodePointCount = 1;
GreaterStashed = false;
return FormatTok;
}
@@ -1146,12 +1153,12 @@ private:
}
// Now FormatTok is the next non-whitespace token.
- FormatTok->TokenLength = Text.size();
+ FormatTok->ByteCount = Text.size();
TrailingWhitespace = 0;
if (FormatTok->Tok.is(tok::comment)) {
TrailingWhitespace = Text.size() - Text.rtrim().size();
- FormatTok->TokenLength -= TrailingWhitespace;
+ FormatTok->ByteCount -= TrailingWhitespace;
}
// In case the token starts with escaped newlines, we want to
@@ -1164,7 +1171,7 @@ private:
while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
// FIXME: ++FormatTok->NewlinesBefore is missing...
WhitespaceLength += 2;
- FormatTok->TokenLength -= 2;
+ FormatTok->ByteCount -= 2;
i += 2;
}
@@ -1176,15 +1183,19 @@ private:
if (FormatTok->Tok.is(tok::greatergreater)) {
FormatTok->Tok.setKind(tok::greater);
- FormatTok->TokenLength = 1;
+ FormatTok->ByteCount = 1;
GreaterStashed = true;
}
+ unsigned EncodingExtraBytes =
+ Text.size() - encoding::getCodePointCount(Text, Encoding);
+ FormatTok->CodePointCount = FormatTok->ByteCount - EncodingExtraBytes;
+
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
FormatTok->TokenText = StringRef(
SourceMgr.getCharacterData(FormatTok->getStartOfNonWhitespace()),
- FormatTok->TokenLength);
+ FormatTok->ByteCount);
return FormatTok;
}
@@ -1194,6 +1205,7 @@ private:
Lexer &Lex;
SourceManager &SourceMgr;
IdentifierTable IdentTable;
+ encoding::Encoding Encoding;
llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
SmallVector<FormatToken *, 16> Tokens;
@@ -1209,17 +1221,22 @@ public:
Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
const std::vector<CharSourceRange> &Ranges)
: Style(Style), Lex(Lex), SourceMgr(SourceMgr),
- Whitespaces(SourceMgr, Style), Ranges(Ranges) {}
+ Whitespaces(SourceMgr, Style), Ranges(Ranges),
+ Encoding(encoding::detectEncoding(Lex.getBuffer())) {
+ DEBUG(llvm::dbgs()
+ << "File encoding: "
+ << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
+ << "\n");
+ }
virtual ~Formatter() {}
tooling::Replacements format() {
- FormatTokenLexer Tokens(Lex, SourceMgr);
+ FormatTokenLexer Tokens(Lex, SourceMgr, Encoding);
UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
bool StructuralError = Parser.parse();
- TokenAnnotator Annotator(Style, SourceMgr, Lex,
- Tokens.getIdentTable().get("in"));
+ TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
Annotator.annotate(AnnotatedLines[i]);
}
@@ -1290,7 +1307,7 @@ public:
1;
}
UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
- TheLine.First, Whitespaces);
+ TheLine.First, Whitespaces, Encoding);
Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
IndentForLevel[TheLine.Level] = LevelIndent;
PreviousLineWasTouched = true;
@@ -1556,7 +1573,7 @@ private:
CharSourceRange LineRange = CharSourceRange::getCharRange(
First->WhitespaceRange.getBegin().getLocWithOffset(
First->LastNewlineOffset),
- Last->Tok.getLocation().getLocWithOffset(Last->TokenLength - 1));
+ Last->Tok.getLocation().getLocWithOffset(Last->ByteCount - 1));
return touchesRanges(LineRange);
}
@@ -1616,6 +1633,8 @@ private:
WhitespaceManager Whitespaces;
std::vector<CharSourceRange> Ranges;
std::vector<AnnotatedLine> AnnotatedLines;
+
+ encoding::Encoding Encoding;
};
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
OpenPOWER on IntegriCloud