diff options
Diffstat (limited to 'clang/lib/Format/ContinuationIndenter.cpp')
-rw-r--r-- | clang/lib/Format/ContinuationIndenter.cpp | 159 |
1 files changed, 154 insertions, 5 deletions
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index b57b8de2e70..4f624dd2fb4 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -14,6 +14,7 @@ #include "ContinuationIndenter.h" #include "BreakableToken.h" +#include "FormatInternal.h" #include "WhitespaceManager.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" @@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok, (LessTok.Previous && LessTok.Previous->is(tok::equal)))); } +// Returns the delimiter of a raw string literal, or None if TokenText is not +// the text of a raw string literal. The delimiter could be the empty string. +// For example, the delimiter of R"deli(cont)deli" is deli. +static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) { + if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'. + || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) + return None; + + // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has + // size at most 16 by the standard, so the first '(' must be among the first + // 19 bytes. + size_t LParenPos = TokenText.substr(0, 19).find_first_of('('); + if (LParenPos == StringRef::npos) + return None; + StringRef Delimiter = TokenText.substr(2, LParenPos - 2); + + // Check that the string ends in ')Delimiter"'. + size_t RParenPos = TokenText.size() - Delimiter.size() - 2; + if (TokenText[RParenPos] != ')') + return None; + if (!TokenText.substr(RParenPos + 1).startswith(Delimiter)) + return None; + return Delimiter; +} + +RawStringFormatStyleManager::RawStringFormatStyleManager( + const FormatStyle &CodeStyle) { + for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { + FormatStyle Style; + if (!getPredefinedStyle(RawStringFormat.BasedOnStyle, + RawStringFormat.Language, &Style)) { + Style = getLLVMStyle(); + Style.Language = RawStringFormat.Language; + } + Style.ColumnLimit = CodeStyle.ColumnLimit; + DelimiterStyle.insert({RawStringFormat.Delimiter, Style}); + } +} + +llvm::Optional<FormatStyle> +RawStringFormatStyleManager::get(StringRef Delimiter) const { + auto It = DelimiterStyle.find(Delimiter); + if (It == DelimiterStyle.end()) + return None; + return It->second; +} + ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, @@ -85,14 +133,18 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), Whitespaces(Whitespaces), Encoding(Encoding), BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), - CommentPragmasRegex(Style.CommentPragmas) {} + CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {} LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, + unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun) { LineState State; State.FirstIndent = FirstIndent; - State.Column = FirstIndent; + if (FirstStartColumn && Line->First->NewlinesBefore == 0) + State.Column = FirstStartColumn; + else + State.Column = FirstIndent; // With preprocessor directive indentation, the line starts on column 0 // since it's indented after the hash, but FirstIndent is set to the // preprocessor indent. @@ -1216,6 +1268,89 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { State.Stack.back().BreakBeforeParameter = true; } +static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn, + unsigned TabWidth, + encoding::Encoding Encoding) { + size_t LastNewlinePos = Text.find_last_of("\n"); + if (LastNewlinePos == StringRef::npos) { + return StartColumn + + encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding); + } else { + return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos), + /*StartColumn=*/0, TabWidth, Encoding); + } +} + +unsigned ContinuationIndenter::reformatRawStringLiteral( + const FormatToken &Current, unsigned StartColumn, LineState &State, + StringRef Delimiter, const FormatStyle &RawStringStyle, bool DryRun) { + // The text of a raw string is between the leading 'R"delimiter(' and the + // trailing 'delimiter)"'. + unsigned PrefixSize = 3 + Delimiter.size(); + unsigned SuffixSize = 2 + Delimiter.size(); + + // The first start column is the column the raw text starts. + unsigned FirstStartColumn = StartColumn + PrefixSize; + + // The next start column is the intended indentation a line break inside + // the raw string at level 0. It is determined by the following rules: + // - if the content starts on newline, it is one level more than the current + // indent, and + // - if the content does not start on a newline, it is the first start + // column. + // These rules have the advantage that the formatted content both does not + // violate the rectangle rule and visually flows within the surrounding + // source. + bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n'; + unsigned NextStartColumn = ContentStartsOnNewline + ? State.Stack.back().Indent + Style.IndentWidth + : FirstStartColumn; + + // The last start column is the column the raw string suffix starts if it is + // put on a newline. + // The last start column is the intended indentation of the raw string postfix + // if it is put on a newline. It is determined by the following rules: + // - if the raw string prefix starts on a newline, it is the column where + // that raw string prefix starts, and + // - if the raw string prefix does not start on a newline, it is the current + // indent. + unsigned LastStartColumn = Current.NewlinesBefore + ? FirstStartColumn - PrefixSize + : State.Stack.back().Indent; + + std::string RawText = + Current.TokenText.substr(PrefixSize).drop_back(SuffixSize); + + std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat( + RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, + FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>", + /*FormattingAttemptStatus=*/nullptr); + + auto NewCode = applyAllReplacements(RawText, Fixes.first); + tooling::Replacements NoFixes; + if (!NewCode) { + State.Column += Current.ColumnWidth; + return 0; + } + if (!DryRun) { + SourceLocation OriginLoc = + Current.Tok.getLocation().getLocWithOffset(PrefixSize); + for (const tooling::Replacement &Fix : Fixes.first) { + auto Err = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), + Fix.getLength(), Fix.getReplacementText())); + if (Err) { + llvm::errs() << "Failed to reformat raw string: " + << llvm::toString(std::move(Err)) << "\n"; + } + } + } + unsigned RawLastLineEndColumn = getLastLineEndColumn( + *NewCode, FirstStartColumn, Style.TabWidth, Encoding); + State.Column = RawLastLineEndColumn + SuffixSize; + return Fixes.second; +} + unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, LineState &State) { if (!Current.IsMultiline) @@ -1238,9 +1373,18 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, LineState &State, bool DryRun) { - // Don't break multi-line tokens other than block comments. Instead, just - // update the state. - if (Current.isNot(TT_BlockComment) && Current.IsMultiline) + // Compute the raw string style to use in case this is a raw string literal + // that can be reformatted. + llvm::Optional<StringRef> Delimiter = None; + llvm::Optional<FormatStyle> RawStringStyle = None; + if (Current.isStringLiteral()) + Delimiter = getRawStringDelimiter(Current.TokenText); + if (Delimiter) + RawStringStyle = RawStringFormats.get(*Delimiter); + + // Don't break multi-line tokens other than block comments and raw string + // literals. Instead, just update the state. + if (Current.isNot(TT_BlockComment) && !RawStringStyle && Current.IsMultiline) return addMultilineToken(Current, State); // Don't break implicit string literals or import statements. @@ -1275,6 +1419,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, if (Current.IsUnterminatedLiteral) return 0; + if (RawStringStyle) { + RawStringStyle->ColumnLimit = ColumnLimit; + return reformatRawStringLiteral(Current, StartColumn, State, *Delimiter, + *RawStringStyle, DryRun); + } StringRef Text = Current.TokenText; StringRef Prefix; StringRef Postfix; |