summaryrefslogtreecommitdiffstats
path: root/clang/lib/Format/ContinuationIndenter.cpp
diff options
context:
space:
mode:
authorKrasimir Georgiev <krasimir@google.com>2017-10-30 14:01:50 +0000
committerKrasimir Georgiev <krasimir@google.com>2017-10-30 14:01:50 +0000
commit9ad83fe7f64dbb7f2bebafa8e9b7637f98de8c8d (patch)
tree872a89f5225814bbfa0b4000f9f638f49c962f79 /clang/lib/Format/ContinuationIndenter.cpp
parent5cde1ccb2998fe06e3930c1d5a8f3c4fb93e845d (diff)
downloadbcm5719-llvm-9ad83fe7f64dbb7f2bebafa8e9b7637f98de8c8d.tar.gz
bcm5719-llvm-9ad83fe7f64dbb7f2bebafa8e9b7637f98de8c8d.zip
[clang-format] Format raw string literals
Summary: This patch adds raw string literal formatting. Reviewers: djasper, klimek Reviewed By: klimek Subscribers: klimek, mgorny Differential Revision: https://reviews.llvm.org/D35943 llvm-svn: 316903
Diffstat (limited to 'clang/lib/Format/ContinuationIndenter.cpp')
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp159
1 files changed, 154 insertions, 5 deletions
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index b57b8de2e70..4f624dd2fb4 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -14,6 +14,7 @@
#include "ContinuationIndenter.h"
#include "BreakableToken.h"
+#include "FormatInternal.h"
#include "WhitespaceManager.h"
#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Basic/SourceManager.h"
@@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok,
(LessTok.Previous && LessTok.Previous->is(tok::equal))));
}
+// Returns the delimiter of a raw string literal, or None if TokenText is not
+// the text of a raw string literal. The delimiter could be the empty string.
+// For example, the delimiter of R"deli(cont)deli" is deli.
+static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
+ if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'.
+ || !TokenText.startswith("R\"") || !TokenText.endswith("\""))
+ return None;
+
+ // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has
+ // size at most 16 by the standard, so the first '(' must be among the first
+ // 19 bytes.
+ size_t LParenPos = TokenText.substr(0, 19).find_first_of('(');
+ if (LParenPos == StringRef::npos)
+ return None;
+ StringRef Delimiter = TokenText.substr(2, LParenPos - 2);
+
+ // Check that the string ends in ')Delimiter"'.
+ size_t RParenPos = TokenText.size() - Delimiter.size() - 2;
+ if (TokenText[RParenPos] != ')')
+ return None;
+ if (!TokenText.substr(RParenPos + 1).startswith(Delimiter))
+ return None;
+ return Delimiter;
+}
+
+RawStringFormatStyleManager::RawStringFormatStyleManager(
+ const FormatStyle &CodeStyle) {
+ for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
+ FormatStyle Style;
+ if (!getPredefinedStyle(RawStringFormat.BasedOnStyle,
+ RawStringFormat.Language, &Style)) {
+ Style = getLLVMStyle();
+ Style.Language = RawStringFormat.Language;
+ }
+ Style.ColumnLimit = CodeStyle.ColumnLimit;
+ DelimiterStyle.insert({RawStringFormat.Delimiter, Style});
+ }
+}
+
+llvm::Optional<FormatStyle>
+RawStringFormatStyleManager::get(StringRef Delimiter) const {
+ auto It = DelimiterStyle.find(Delimiter);
+ if (It == DelimiterStyle.end())
+ return None;
+ return It->second;
+}
+
ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
const AdditionalKeywords &Keywords,
const SourceManager &SourceMgr,
@@ -85,14 +133,18 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
: Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),
Whitespaces(Whitespaces), Encoding(Encoding),
BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),
- CommentPragmasRegex(Style.CommentPragmas) {}
+ CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {}
LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
+ unsigned FirstStartColumn,
const AnnotatedLine *Line,
bool DryRun) {
LineState State;
State.FirstIndent = FirstIndent;
- State.Column = FirstIndent;
+ if (FirstStartColumn && Line->First->NewlinesBefore == 0)
+ State.Column = FirstStartColumn;
+ else
+ State.Column = FirstIndent;
// With preprocessor directive indentation, the line starts on column 0
// since it's indented after the hash, but FirstIndent is set to the
// preprocessor indent.
@@ -1216,6 +1268,89 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) {
State.Stack.back().BreakBeforeParameter = true;
}
+static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn,
+ unsigned TabWidth,
+ encoding::Encoding Encoding) {
+ size_t LastNewlinePos = Text.find_last_of("\n");
+ if (LastNewlinePos == StringRef::npos) {
+ return StartColumn +
+ encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding);
+ } else {
+ return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos),
+ /*StartColumn=*/0, TabWidth, Encoding);
+ }
+}
+
+unsigned ContinuationIndenter::reformatRawStringLiteral(
+ const FormatToken &Current, unsigned StartColumn, LineState &State,
+ StringRef Delimiter, const FormatStyle &RawStringStyle, bool DryRun) {
+ // The text of a raw string is between the leading 'R"delimiter(' and the
+ // trailing 'delimiter)"'.
+ unsigned PrefixSize = 3 + Delimiter.size();
+ unsigned SuffixSize = 2 + Delimiter.size();
+
+ // The first start column is the column the raw text starts.
+ unsigned FirstStartColumn = StartColumn + PrefixSize;
+
+ // The next start column is the intended indentation a line break inside
+ // the raw string at level 0. It is determined by the following rules:
+ // - if the content starts on newline, it is one level more than the current
+ // indent, and
+ // - if the content does not start on a newline, it is the first start
+ // column.
+ // These rules have the advantage that the formatted content both does not
+ // violate the rectangle rule and visually flows within the surrounding
+ // source.
+ bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n';
+ unsigned NextStartColumn = ContentStartsOnNewline
+ ? State.Stack.back().Indent + Style.IndentWidth
+ : FirstStartColumn;
+
+ // The last start column is the column the raw string suffix starts if it is
+ // put on a newline.
+ // The last start column is the intended indentation of the raw string postfix
+ // if it is put on a newline. It is determined by the following rules:
+ // - if the raw string prefix starts on a newline, it is the column where
+ // that raw string prefix starts, and
+ // - if the raw string prefix does not start on a newline, it is the current
+ // indent.
+ unsigned LastStartColumn = Current.NewlinesBefore
+ ? FirstStartColumn - PrefixSize
+ : State.Stack.back().Indent;
+
+ std::string RawText =
+ Current.TokenText.substr(PrefixSize).drop_back(SuffixSize);
+
+ std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
+ RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
+ FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
+ /*FormattingAttemptStatus=*/nullptr);
+
+ auto NewCode = applyAllReplacements(RawText, Fixes.first);
+ tooling::Replacements NoFixes;
+ if (!NewCode) {
+ State.Column += Current.ColumnWidth;
+ return 0;
+ }
+ if (!DryRun) {
+ SourceLocation OriginLoc =
+ Current.Tok.getLocation().getLocWithOffset(PrefixSize);
+ for (const tooling::Replacement &Fix : Fixes.first) {
+ auto Err = Whitespaces.addReplacement(tooling::Replacement(
+ SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
+ Fix.getLength(), Fix.getReplacementText()));
+ if (Err) {
+ llvm::errs() << "Failed to reformat raw string: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+ }
+ }
+ unsigned RawLastLineEndColumn = getLastLineEndColumn(
+ *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
+ State.Column = RawLastLineEndColumn + SuffixSize;
+ return Fixes.second;
+}
+
unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
LineState &State) {
if (!Current.IsMultiline)
@@ -1238,9 +1373,18 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
LineState &State,
bool DryRun) {
- // Don't break multi-line tokens other than block comments. Instead, just
- // update the state.
- if (Current.isNot(TT_BlockComment) && Current.IsMultiline)
+ // Compute the raw string style to use in case this is a raw string literal
+ // that can be reformatted.
+ llvm::Optional<StringRef> Delimiter = None;
+ llvm::Optional<FormatStyle> RawStringStyle = None;
+ if (Current.isStringLiteral())
+ Delimiter = getRawStringDelimiter(Current.TokenText);
+ if (Delimiter)
+ RawStringStyle = RawStringFormats.get(*Delimiter);
+
+ // Don't break multi-line tokens other than block comments and raw string
+ // literals. Instead, just update the state.
+ if (Current.isNot(TT_BlockComment) && !RawStringStyle && Current.IsMultiline)
return addMultilineToken(Current, State);
// Don't break implicit string literals or import statements.
@@ -1275,6 +1419,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
if (Current.IsUnterminatedLiteral)
return 0;
+ if (RawStringStyle) {
+ RawStringStyle->ColumnLimit = ColumnLimit;
+ return reformatRawStringLiteral(Current, StartColumn, State, *Delimiter,
+ *RawStringStyle, DryRun);
+ }
StringRef Text = Current.TokenText;
StringRef Prefix;
StringRef Postfix;
OpenPOWER on IntegriCloud