diff options
| author | Sam McCall <sam.mccall@gmail.com> | 2019-06-10 14:26:21 +0000 |
|---|---|---|
| committer | Sam McCall <sam.mccall@gmail.com> | 2019-06-10 14:26:21 +0000 |
| commit | 25c6257ba0280bc1722721d32adc47d0998fed27 (patch) | |
| tree | 86610a1d3ce607d590ceacf2f22c6b979a9d6dd5 /clang-tools-extra/clangd/Format.cpp | |
| parent | dd4f253c4d8f6debb91c04d3b3965b133169f0e2 (diff) | |
| download | bcm5719-llvm-25c6257ba0280bc1722721d32adc47d0998fed27.tar.gz bcm5719-llvm-25c6257ba0280bc1722721d32adc47d0998fed27.zip | |
[clangd] Revamp textDocument/onTypeFormatting.
Summary:
The existing implementation (which triggers on }) is fairly simple and
has flaws:
- doesn't trigger frequently/regularly enough (particularly in editors that type the }
for you)
- often reformats too much code around the edit
- has jarring cases that I don't have clear ideas for fixing
This implementation is designed to trigger on newline, which feels to me more
intuitive than } or ;.
It does have allow for reformatting after other characters - it has a
basic behavior and a model for adding specialized behavior for
particular characters. But at least initially I'd stick to advertising
\n in the capabilities.
This also handles comment splitting: when you insert a line break inside
a line comment, it will make the new line into an aligned line comment.
Working on tests, but want people to patch it in and try it - it's hard to
see if "feel" is right purely by looking at a test.
Reviewers: ilya-biryukov, hokein
Subscribers: mgorny, ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D60605
llvm-svn: 362939
Diffstat (limited to 'clang-tools-extra/clangd/Format.cpp')
| -rw-r--r-- | clang-tools-extra/clangd/Format.cpp | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/clang-tools-extra/clangd/Format.cpp b/clang-tools-extra/clangd/Format.cpp new file mode 100644 index 00000000000..910bd76e6d3 --- /dev/null +++ b/clang-tools-extra/clangd/Format.cpp @@ -0,0 +1,377 @@ +//===--- Format.cpp -----------------------------------------*- C++-*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "Format.h" +#include "Logger.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Format/Format.h" +#include "clang/Lex/Lexer.h" +#include "clang/Tooling/Core/Replacement.h" +#include "llvm/Support/Unicode.h" + +namespace clang { +namespace clangd { +namespace { + +/// Append closing brackets )]} to \p Code to make it well-formed. +/// Clang-format conservatively refuses to format files with unmatched brackets +/// as it isn't sure where the errors are and so can't correct. +/// When editing, it's reasonable to assume code before the cursor is complete. +void closeBrackets(std::string &Code, const format::FormatStyle &Style) { + SourceManagerForFile FileSM("dummy.cpp", Code); + auto &SM = FileSM.get(); + FileID FID = SM.getMainFileID(); + Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style)); + Token Tok; + std::vector<char> Brackets; + while (!Lex.LexFromRawLexer(Tok)) { + switch(Tok.getKind()) { + case tok::l_paren: + Brackets.push_back(')'); + break; + case tok::l_brace: + Brackets.push_back('}'); + break; + case tok::l_square: + Brackets.push_back(']'); + break; + case tok::r_paren: + if (!Brackets.empty() && Brackets.back() == ')') + Brackets.pop_back(); + break; + case tok::r_brace: + if (!Brackets.empty() && Brackets.back() == '}') + Brackets.pop_back(); + break; + case tok::r_square: + if (!Brackets.empty() && Brackets.back() == ']') + Brackets.pop_back(); + break; + default: + continue; + } + } + // Attempt to end any open comments first. + Code.append("\n// */\n"); + Code.append(Brackets.rbegin(), Brackets.rend()); +} + +static StringRef commentMarker(llvm::StringRef Line) { + for (StringRef Marker : {"///", "//"}){ + auto I = Line.rfind(Marker); + if (I != StringRef::npos) + return Line.substr(I, Marker.size()); + } + return ""; +} + +llvm::StringRef firstLine(llvm::StringRef Code) { + return Code.take_until([](char C) { return C == '\n'; }); +} + +llvm::StringRef lastLine(llvm::StringRef Code) { + llvm::StringRef Rest = Code; + while (!Rest.empty() && Rest.back() != '\n') + Rest = Rest.drop_back(); + return Code.substr(Rest.size()); +} + +// Filename is needed for tooling::Replacement and some overloads of reformat(). +// Its value should not affect the outcome. We use the default from reformat(). +llvm::StringRef Filename = "<stdin>"; + +// tooling::Replacement from overlapping StringRefs: From must be part of Code. +tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From, + llvm::StringRef To) { + assert(From.begin() >= Code.begin() && From.end() <= Code.end()); + // The filename is required but ignored. + return tooling::Replacement(Filename, From.data() - Code.data(), + From.size(), To); +}; + +// High-level representation of incremental formatting changes. +// The changes are made in two steps. +// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding +// comment markers when splitting a line comment with a newline). +// 2) a selective clang-format run: +// - the "source code" passed to clang format is the code up to the cursor, +// a placeholder for the cursor, and some closing brackets +// - the formatting is restricted to the cursor and (possibly) other ranges +// (e.g. the old line when inserting a newline). +// - changes before the cursor are applied, those after are discarded. +struct IncrementalChanges { + // Changes that should be applied before running clang-format. + tooling::Replacements Changes; + // Ranges of the original source code that should be clang-formatted. + // The CursorProxyText will also be formatted. + std::vector<tooling::Range> FormatRanges; + // The source code that should stand in for the cursor when clang-formatting. + // e.g. after inserting a newline, a line-comment at the cursor is used to + // ensure that the newline is preserved. + std::string CursorPlaceholder; +}; + +// After a newline: +// - we continue any line-comment that was split +// - we format the old line in addition to the cursor +// - we represent the cursor with a line comment to preserve the newline +IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code, + unsigned Cursor) { + IncrementalChanges Result; + // Before newline, code looked like: + // leading^trailing + // After newline, code looks like: + // leading + // indentation^trailing + // Where indentation was added by the editor. + StringRef Trailing = firstLine(Code.substr(Cursor)); + StringRef Indentation = lastLine(Code.take_front(Cursor)); + if (Indentation.data() == Code.data()) { + vlog("Typed a newline, but we're still on the first line!"); + return Result; + } + StringRef Leading = + lastLine(Code.take_front(Indentation.data() - Code.data() - 1)); + StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1)); + + // Strip leading whitespace on trailing line. + StringRef TrailingTrim = Trailing.ltrim(); + if (unsigned TrailWS = Trailing.size() - TrailingTrim.size()) + cantFail(Result.Changes.add( + replacement(Code, StringRef(Trailing.begin(), TrailWS), ""))); + + // If we split a comment, replace indentation with a comment marker. + // If the editor made the new line a comment, also respect that. + StringRef CommentMarker = commentMarker(Leading); + bool NewLineIsComment = !commentMarker(Indentation).empty(); + if (!CommentMarker.empty() && + (NewLineIsComment || !commentMarker(NextLine).empty() || + (!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) { + using llvm::sys::unicode::columnWidthUTF8; + // We indent the new comment to match the previous one. + StringRef PreComment = + Leading.take_front(CommentMarker.data() - Leading.data()); + std::string IndentAndComment = + (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ") + .str(); + cantFail( + Result.Changes.add(replacement(Code, Indentation, IndentAndComment))); + } else { + // Remove any indentation and let clang-format re-add it. + // This prevents the cursor marker dragging e.g. an aligned comment with it. + cantFail(Result.Changes.add(replacement(Code, Indentation, ""))); + } + + // If we put a the newline inside a {} pair, put } on its own line... + if (CommentMarker.empty() && Leading.endswith("{") && + Trailing.startswith("}")) { + cantFail( + Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}"))); + // ...and format it. + Result.FormatRanges.push_back( + tooling::Range(Trailing.data() - Code.data() + 1, 1)); + } + + // Format the whole leading line. + Result.FormatRanges.push_back( + tooling::Range(Leading.data() - Code.data(), Leading.size())); + + // We use a comment to represent the cursor, to preserve the newline. + // A trailing identifier improves parsing of e.g. for without braces. + // Exception: if the previous line has a trailing comment, we can't use one + // as the cursor (they will be aligned). But in this case we don't need to. + Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident"; + + return Result; +} + +IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor, + llvm::StringRef InsertedText) { + IncrementalChanges Result; + if (InsertedText == "\n") + return getIncrementalChangesAfterNewline(Code, Cursor); + + Result.CursorPlaceholder = " /**/"; + return Result; +} + +// Returns equivalent replacements that preserve the correspondence between +// OldCursor and NewCursor. If OldCursor lies in a replaced region, that +// replacement will be split. +std::vector<tooling::Replacement> +split(const tooling::Replacements &Replacements, unsigned OldCursor, + unsigned NewCursor) { + std::vector<tooling::Replacement> Result; + int LengthChange = 0; + for (const tooling::Replacement &R : Replacements) { + if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor + Result.push_back(R); + LengthChange += R.getReplacementText().size() - R.getLength(); + } else if (R.getOffset() < OldCursor) { // overlaps cursor + int ReplacementSplit = NewCursor - LengthChange - R.getOffset(); + assert(ReplacementSplit >= 0 && + ReplacementSplit <= int(R.getReplacementText().size()) && + "NewCursor incompatible with OldCursor!"); + Result.push_back(tooling::Replacement( + R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(), + R.getReplacementText().take_front(ReplacementSplit))); + Result.push_back(tooling::Replacement( + R.getFilePath(), OldCursor, + R.getLength() - (OldCursor - R.getOffset()), + R.getReplacementText().drop_front(ReplacementSplit))); + } else if (R.getOffset() >= OldCursor) { // after cursor + Result.push_back(R); + } + } + return Result; +} + +} // namespace + +// We're simulating the following sequence of changes: +// - apply the pre-formatting edits (see getIncrementalChanges) +// - insert a placeholder for the cursor +// - format some of the resulting code +// - remove the cursor placeholder again +// The replacements we return are produced by composing these. +// +// The text we actually pass to clang-format is slightly different from this, +// e.g. we have to close brackets. We ensure these differences are *after* +// all the regions we want to format, and discard changes in them. +std::vector<tooling::Replacement> +formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor, + llvm::StringRef InsertedText, format::FormatStyle Style) { + IncrementalChanges Incremental = + getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText); + // Never *remove* lines in response to pressing enter! This annoys users. + if (InsertedText == "\n") { + Style.MaxEmptyLinesToKeep = 1000; + Style.KeepEmptyLinesAtTheStartOfBlocks = true; + } + + // Compute the code we want to format: + // 1) Start with code after the pre-formatting edits. + std::string CodeToFormat = cantFail( + tooling::applyAllReplacements(OriginalCode, Incremental.Changes)); + unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor); + // 2) Truncate code after the last interesting range. + unsigned FormatLimit = Cursor; + for (tooling::Range &R : Incremental.FormatRanges) + FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength()); + CodeToFormat.resize(FormatLimit); + // 3) Insert a placeholder for the cursor. + CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder); + // 4) Append brackets after FormatLimit so the code is well-formed. + closeBrackets(CodeToFormat, Style); + + // Determine the ranges to format: + std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges; + // Ranges after the cursor need to be adjusted for the placeholder. + for (auto &R : RangesToFormat) { + if (R.getOffset() > Cursor) + R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(), + R.getLength()); + } + // We also format the cursor. + RangesToFormat.push_back( + tooling::Range(Cursor, Incremental.CursorPlaceholder.size())); + // Also update FormatLimit for the placeholder, we'll use this later. + FormatLimit += Incremental.CursorPlaceholder.size(); + + // Run clang-format, and truncate changes at FormatLimit. + tooling::Replacements FormattingChanges; + format::FormattingAttemptStatus Status; + for (const tooling::Replacement &R : format::reformat( + Style, CodeToFormat, RangesToFormat, Filename, &Status)) { + if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit. + cantFail(FormattingChanges.add(R)); + else if(R.getOffset() < FormatLimit) { // Overlaps limit. + if (R.getReplacementText().empty()) // Deletions are easy to handle. + cantFail(FormattingChanges.add(tooling::Replacement(Filename, + R.getOffset(), FormatLimit - R.getOffset(), ""))); + else + // Hopefully won't happen in practice? + elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}", + Cursor, CodeToFormat); + } + } + if (!Status.FormatComplete) + vlog("Incremental format incomplete at line {0}", Status.Line); + + // Now we are ready to compose the changes relative to OriginalCode. + // edits -> insert placeholder -> format -> remove placeholder. + // We must express insert/remove as Replacements. + tooling::Replacements InsertCursorPlaceholder( + tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder)); + unsigned FormattedCursorStart = + FormattingChanges.getShiftedCodePosition(Cursor), + FormattedCursorEnd = FormattingChanges.getShiftedCodePosition( + Cursor + Incremental.CursorPlaceholder.size()); + tooling::Replacements RemoveCursorPlaceholder( + tooling::Replacement(Filename, FormattedCursorStart, + FormattedCursorEnd - FormattedCursorStart, "")); + + // We can't simply merge() and return: tooling::Replacements will combine + // adjacent edits left and right of the cursor. This gives the right source + // code, but loses information about where the cursor is! + // Fortunately, none of the individual passes lose information, so: + // - we use merge() to compute the final Replacements + // - we chain getShiftedCodePosition() to compute final cursor position + // - we split the final Replacements at the cursor position, so that + // each Replacement lies either before or after the cursor. + tooling::Replacements Final; + unsigned FinalCursor = OriginalCursor; +#ifndef NDEBUG + std::string FinalCode = OriginalCode; + dlog("Initial code: {0}", FinalCode); +#endif + for (auto Pass : + std::vector<std::pair<const char *, const tooling::Replacements *>>{ + {"Pre-formatting changes", &Incremental.Changes}, + {"Insert placeholder", &InsertCursorPlaceholder}, + {"clang-format", &FormattingChanges}, + {"Remove placeholder", &RemoveCursorPlaceholder}}) { + Final = Final.merge(*Pass.second); + FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor); +#ifndef NDEBUG + FinalCode = + cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second)); + dlog("After {0}:\n{1}^{2}", Pass.first, + StringRef(FinalCode).take_front(FinalCursor), + StringRef(FinalCode).drop_front(FinalCursor)); +#endif + } + return split(Final, OriginalCursor, FinalCursor); +} + +unsigned +transformCursorPosition(unsigned Offset, + const std::vector<tooling::Replacement> &Replacements) { + unsigned OriginalOffset = Offset; + for (const auto &R : Replacements) { + if (R.getOffset() + R.getLength() <= OriginalOffset) { + // Replacement is before cursor. + Offset += R.getReplacementText().size(); + Offset -= R.getLength(); + } else if (R.getOffset() < OriginalOffset) { + // Replacement overlaps cursor. + // Preserve position within replacement text, as far as possible. + unsigned PositionWithinReplacement = Offset - R.getOffset(); + if (PositionWithinReplacement > R.getReplacementText().size()) { + Offset += R.getReplacementText().size(); + Offset -= PositionWithinReplacement; + } + } else { + // Replacement after cursor. + break; // Replacements are sorted, the rest are also after the cursor. + } + } + return Offset; +} + +} // namespace clangd +} // namespace clang |

