summaryrefslogtreecommitdiffstats
path: root/clang-tools-extra/clangd/Format.cpp
diff options
context:
space:
mode:
authorSam McCall <sam.mccall@gmail.com>2019-06-10 14:26:21 +0000
committerSam McCall <sam.mccall@gmail.com>2019-06-10 14:26:21 +0000
commit25c6257ba0280bc1722721d32adc47d0998fed27 (patch)
tree86610a1d3ce607d590ceacf2f22c6b979a9d6dd5 /clang-tools-extra/clangd/Format.cpp
parentdd4f253c4d8f6debb91c04d3b3965b133169f0e2 (diff)
downloadbcm5719-llvm-25c6257ba0280bc1722721d32adc47d0998fed27.tar.gz
bcm5719-llvm-25c6257ba0280bc1722721d32adc47d0998fed27.zip
[clangd] Revamp textDocument/onTypeFormatting.
Summary: The existing implementation (which triggers on }) is fairly simple and has flaws: - doesn't trigger frequently/regularly enough (particularly in editors that type the } for you) - often reformats too much code around the edit - has jarring cases that I don't have clear ideas for fixing This implementation is designed to trigger on newline, which feels to me more intuitive than } or ;. It does have allow for reformatting after other characters - it has a basic behavior and a model for adding specialized behavior for particular characters. But at least initially I'd stick to advertising \n in the capabilities. This also handles comment splitting: when you insert a line break inside a line comment, it will make the new line into an aligned line comment. Working on tests, but want people to patch it in and try it - it's hard to see if "feel" is right purely by looking at a test. Reviewers: ilya-biryukov, hokein Subscribers: mgorny, ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D60605 llvm-svn: 362939
Diffstat (limited to 'clang-tools-extra/clangd/Format.cpp')
-rw-r--r--clang-tools-extra/clangd/Format.cpp377
1 files changed, 377 insertions, 0 deletions
diff --git a/clang-tools-extra/clangd/Format.cpp b/clang-tools-extra/clangd/Format.cpp
new file mode 100644
index 00000000000..910bd76e6d3
--- /dev/null
+++ b/clang-tools-extra/clangd/Format.cpp
@@ -0,0 +1,377 @@
+//===--- Format.cpp -----------------------------------------*- C++-*------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "Format.h"
+#include "Logger.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/Core/Replacement.h"
+#include "llvm/Support/Unicode.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+/// Append closing brackets )]} to \p Code to make it well-formed.
+/// Clang-format conservatively refuses to format files with unmatched brackets
+/// as it isn't sure where the errors are and so can't correct.
+/// When editing, it's reasonable to assume code before the cursor is complete.
+void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
+ SourceManagerForFile FileSM("dummy.cpp", Code);
+ auto &SM = FileSM.get();
+ FileID FID = SM.getMainFileID();
+ Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
+ Token Tok;
+ std::vector<char> Brackets;
+ while (!Lex.LexFromRawLexer(Tok)) {
+ switch(Tok.getKind()) {
+ case tok::l_paren:
+ Brackets.push_back(')');
+ break;
+ case tok::l_brace:
+ Brackets.push_back('}');
+ break;
+ case tok::l_square:
+ Brackets.push_back(']');
+ break;
+ case tok::r_paren:
+ if (!Brackets.empty() && Brackets.back() == ')')
+ Brackets.pop_back();
+ break;
+ case tok::r_brace:
+ if (!Brackets.empty() && Brackets.back() == '}')
+ Brackets.pop_back();
+ break;
+ case tok::r_square:
+ if (!Brackets.empty() && Brackets.back() == ']')
+ Brackets.pop_back();
+ break;
+ default:
+ continue;
+ }
+ }
+ // Attempt to end any open comments first.
+ Code.append("\n// */\n");
+ Code.append(Brackets.rbegin(), Brackets.rend());
+}
+
+static StringRef commentMarker(llvm::StringRef Line) {
+ for (StringRef Marker : {"///", "//"}){
+ auto I = Line.rfind(Marker);
+ if (I != StringRef::npos)
+ return Line.substr(I, Marker.size());
+ }
+ return "";
+}
+
+llvm::StringRef firstLine(llvm::StringRef Code) {
+ return Code.take_until([](char C) { return C == '\n'; });
+}
+
+llvm::StringRef lastLine(llvm::StringRef Code) {
+ llvm::StringRef Rest = Code;
+ while (!Rest.empty() && Rest.back() != '\n')
+ Rest = Rest.drop_back();
+ return Code.substr(Rest.size());
+}
+
+// Filename is needed for tooling::Replacement and some overloads of reformat().
+// Its value should not affect the outcome. We use the default from reformat().
+llvm::StringRef Filename = "<stdin>";
+
+// tooling::Replacement from overlapping StringRefs: From must be part of Code.
+tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
+ llvm::StringRef To) {
+ assert(From.begin() >= Code.begin() && From.end() <= Code.end());
+ // The filename is required but ignored.
+ return tooling::Replacement(Filename, From.data() - Code.data(),
+ From.size(), To);
+};
+
+// High-level representation of incremental formatting changes.
+// The changes are made in two steps.
+// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
+// comment markers when splitting a line comment with a newline).
+// 2) a selective clang-format run:
+// - the "source code" passed to clang format is the code up to the cursor,
+// a placeholder for the cursor, and some closing brackets
+// - the formatting is restricted to the cursor and (possibly) other ranges
+// (e.g. the old line when inserting a newline).
+// - changes before the cursor are applied, those after are discarded.
+struct IncrementalChanges {
+ // Changes that should be applied before running clang-format.
+ tooling::Replacements Changes;
+ // Ranges of the original source code that should be clang-formatted.
+ // The CursorProxyText will also be formatted.
+ std::vector<tooling::Range> FormatRanges;
+ // The source code that should stand in for the cursor when clang-formatting.
+ // e.g. after inserting a newline, a line-comment at the cursor is used to
+ // ensure that the newline is preserved.
+ std::string CursorPlaceholder;
+};
+
+// After a newline:
+// - we continue any line-comment that was split
+// - we format the old line in addition to the cursor
+// - we represent the cursor with a line comment to preserve the newline
+IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
+ unsigned Cursor) {
+ IncrementalChanges Result;
+ // Before newline, code looked like:
+ // leading^trailing
+ // After newline, code looks like:
+ // leading
+ // indentation^trailing
+ // Where indentation was added by the editor.
+ StringRef Trailing = firstLine(Code.substr(Cursor));
+ StringRef Indentation = lastLine(Code.take_front(Cursor));
+ if (Indentation.data() == Code.data()) {
+ vlog("Typed a newline, but we're still on the first line!");
+ return Result;
+ }
+ StringRef Leading =
+ lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
+ StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
+
+ // Strip leading whitespace on trailing line.
+ StringRef TrailingTrim = Trailing.ltrim();
+ if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
+ cantFail(Result.Changes.add(
+ replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
+
+ // If we split a comment, replace indentation with a comment marker.
+ // If the editor made the new line a comment, also respect that.
+ StringRef CommentMarker = commentMarker(Leading);
+ bool NewLineIsComment = !commentMarker(Indentation).empty();
+ if (!CommentMarker.empty() &&
+ (NewLineIsComment || !commentMarker(NextLine).empty() ||
+ (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
+ using llvm::sys::unicode::columnWidthUTF8;
+ // We indent the new comment to match the previous one.
+ StringRef PreComment =
+ Leading.take_front(CommentMarker.data() - Leading.data());
+ std::string IndentAndComment =
+ (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
+ .str();
+ cantFail(
+ Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
+ } else {
+ // Remove any indentation and let clang-format re-add it.
+ // This prevents the cursor marker dragging e.g. an aligned comment with it.
+ cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
+ }
+
+ // If we put a the newline inside a {} pair, put } on its own line...
+ if (CommentMarker.empty() && Leading.endswith("{") &&
+ Trailing.startswith("}")) {
+ cantFail(
+ Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
+ // ...and format it.
+ Result.FormatRanges.push_back(
+ tooling::Range(Trailing.data() - Code.data() + 1, 1));
+ }
+
+ // Format the whole leading line.
+ Result.FormatRanges.push_back(
+ tooling::Range(Leading.data() - Code.data(), Leading.size()));
+
+ // We use a comment to represent the cursor, to preserve the newline.
+ // A trailing identifier improves parsing of e.g. for without braces.
+ // Exception: if the previous line has a trailing comment, we can't use one
+ // as the cursor (they will be aligned). But in this case we don't need to.
+ Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
+
+ return Result;
+}
+
+IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
+ llvm::StringRef InsertedText) {
+ IncrementalChanges Result;
+ if (InsertedText == "\n")
+ return getIncrementalChangesAfterNewline(Code, Cursor);
+
+ Result.CursorPlaceholder = " /**/";
+ return Result;
+}
+
+// Returns equivalent replacements that preserve the correspondence between
+// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
+// replacement will be split.
+std::vector<tooling::Replacement>
+split(const tooling::Replacements &Replacements, unsigned OldCursor,
+ unsigned NewCursor) {
+ std::vector<tooling::Replacement> Result;
+ int LengthChange = 0;
+ for (const tooling::Replacement &R : Replacements) {
+ if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
+ Result.push_back(R);
+ LengthChange += R.getReplacementText().size() - R.getLength();
+ } else if (R.getOffset() < OldCursor) { // overlaps cursor
+ int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
+ assert(ReplacementSplit >= 0 &&
+ ReplacementSplit <= int(R.getReplacementText().size()) &&
+ "NewCursor incompatible with OldCursor!");
+ Result.push_back(tooling::Replacement(
+ R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
+ R.getReplacementText().take_front(ReplacementSplit)));
+ Result.push_back(tooling::Replacement(
+ R.getFilePath(), OldCursor,
+ R.getLength() - (OldCursor - R.getOffset()),
+ R.getReplacementText().drop_front(ReplacementSplit)));
+ } else if (R.getOffset() >= OldCursor) { // after cursor
+ Result.push_back(R);
+ }
+ }
+ return Result;
+}
+
+} // namespace
+
+// We're simulating the following sequence of changes:
+// - apply the pre-formatting edits (see getIncrementalChanges)
+// - insert a placeholder for the cursor
+// - format some of the resulting code
+// - remove the cursor placeholder again
+// The replacements we return are produced by composing these.
+//
+// The text we actually pass to clang-format is slightly different from this,
+// e.g. we have to close brackets. We ensure these differences are *after*
+// all the regions we want to format, and discard changes in them.
+std::vector<tooling::Replacement>
+formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
+ llvm::StringRef InsertedText, format::FormatStyle Style) {
+ IncrementalChanges Incremental =
+ getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
+ // Never *remove* lines in response to pressing enter! This annoys users.
+ if (InsertedText == "\n") {
+ Style.MaxEmptyLinesToKeep = 1000;
+ Style.KeepEmptyLinesAtTheStartOfBlocks = true;
+ }
+
+ // Compute the code we want to format:
+ // 1) Start with code after the pre-formatting edits.
+ std::string CodeToFormat = cantFail(
+ tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
+ unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
+ // 2) Truncate code after the last interesting range.
+ unsigned FormatLimit = Cursor;
+ for (tooling::Range &R : Incremental.FormatRanges)
+ FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
+ CodeToFormat.resize(FormatLimit);
+ // 3) Insert a placeholder for the cursor.
+ CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
+ // 4) Append brackets after FormatLimit so the code is well-formed.
+ closeBrackets(CodeToFormat, Style);
+
+ // Determine the ranges to format:
+ std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
+ // Ranges after the cursor need to be adjusted for the placeholder.
+ for (auto &R : RangesToFormat) {
+ if (R.getOffset() > Cursor)
+ R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
+ R.getLength());
+ }
+ // We also format the cursor.
+ RangesToFormat.push_back(
+ tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
+ // Also update FormatLimit for the placeholder, we'll use this later.
+ FormatLimit += Incremental.CursorPlaceholder.size();
+
+ // Run clang-format, and truncate changes at FormatLimit.
+ tooling::Replacements FormattingChanges;
+ format::FormattingAttemptStatus Status;
+ for (const tooling::Replacement &R : format::reformat(
+ Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
+ if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
+ cantFail(FormattingChanges.add(R));
+ else if(R.getOffset() < FormatLimit) { // Overlaps limit.
+ if (R.getReplacementText().empty()) // Deletions are easy to handle.
+ cantFail(FormattingChanges.add(tooling::Replacement(Filename,
+ R.getOffset(), FormatLimit - R.getOffset(), "")));
+ else
+ // Hopefully won't happen in practice?
+ elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
+ Cursor, CodeToFormat);
+ }
+ }
+ if (!Status.FormatComplete)
+ vlog("Incremental format incomplete at line {0}", Status.Line);
+
+ // Now we are ready to compose the changes relative to OriginalCode.
+ // edits -> insert placeholder -> format -> remove placeholder.
+ // We must express insert/remove as Replacements.
+ tooling::Replacements InsertCursorPlaceholder(
+ tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
+ unsigned FormattedCursorStart =
+ FormattingChanges.getShiftedCodePosition(Cursor),
+ FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
+ Cursor + Incremental.CursorPlaceholder.size());
+ tooling::Replacements RemoveCursorPlaceholder(
+ tooling::Replacement(Filename, FormattedCursorStart,
+ FormattedCursorEnd - FormattedCursorStart, ""));
+
+ // We can't simply merge() and return: tooling::Replacements will combine
+ // adjacent edits left and right of the cursor. This gives the right source
+ // code, but loses information about where the cursor is!
+ // Fortunately, none of the individual passes lose information, so:
+ // - we use merge() to compute the final Replacements
+ // - we chain getShiftedCodePosition() to compute final cursor position
+ // - we split the final Replacements at the cursor position, so that
+ // each Replacement lies either before or after the cursor.
+ tooling::Replacements Final;
+ unsigned FinalCursor = OriginalCursor;
+#ifndef NDEBUG
+ std::string FinalCode = OriginalCode;
+ dlog("Initial code: {0}", FinalCode);
+#endif
+ for (auto Pass :
+ std::vector<std::pair<const char *, const tooling::Replacements *>>{
+ {"Pre-formatting changes", &Incremental.Changes},
+ {"Insert placeholder", &InsertCursorPlaceholder},
+ {"clang-format", &FormattingChanges},
+ {"Remove placeholder", &RemoveCursorPlaceholder}}) {
+ Final = Final.merge(*Pass.second);
+ FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
+#ifndef NDEBUG
+ FinalCode =
+ cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
+ dlog("After {0}:\n{1}^{2}", Pass.first,
+ StringRef(FinalCode).take_front(FinalCursor),
+ StringRef(FinalCode).drop_front(FinalCursor));
+#endif
+ }
+ return split(Final, OriginalCursor, FinalCursor);
+}
+
+unsigned
+transformCursorPosition(unsigned Offset,
+ const std::vector<tooling::Replacement> &Replacements) {
+ unsigned OriginalOffset = Offset;
+ for (const auto &R : Replacements) {
+ if (R.getOffset() + R.getLength() <= OriginalOffset) {
+ // Replacement is before cursor.
+ Offset += R.getReplacementText().size();
+ Offset -= R.getLength();
+ } else if (R.getOffset() < OriginalOffset) {
+ // Replacement overlaps cursor.
+ // Preserve position within replacement text, as far as possible.
+ unsigned PositionWithinReplacement = Offset - R.getOffset();
+ if (PositionWithinReplacement > R.getReplacementText().size()) {
+ Offset += R.getReplacementText().size();
+ Offset -= PositionWithinReplacement;
+ }
+ } else {
+ // Replacement after cursor.
+ break; // Replacements are sorted, the rest are also after the cursor.
+ }
+ }
+ return Offset;
+}
+
+} // namespace clangd
+} // namespace clang
OpenPOWER on IntegriCloud