diff options
Diffstat (limited to 'clang-tools-extra/clangd')
-rw-r--r-- | clang-tools-extra/clangd/ClangdLSPServer.cpp | 48 | ||||
-rw-r--r-- | clang-tools-extra/clangd/ClangdLSPServer.h | 2 | ||||
-rw-r--r-- | clang-tools-extra/clangd/Protocol.cpp | 27 | ||||
-rw-r--r-- | clang-tools-extra/clangd/Protocol.h | 15 | ||||
-rw-r--r-- | clang-tools-extra/clangd/SourceCode.cpp | 42 | ||||
-rw-r--r-- | clang-tools-extra/clangd/SourceCode.h | 7 | ||||
-rw-r--r-- | clang-tools-extra/clangd/index/IndexAction.cpp | 1 | ||||
-rw-r--r-- | clang-tools-extra/clangd/index/SymbolLocation.h | 10 | ||||
-rw-r--r-- | clang-tools-extra/clangd/tool/ClangdMain.cpp | 18 |
9 files changed, 149 insertions, 21 deletions
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index a72e8c7cd9b..78466f4f3ad 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -13,6 +13,7 @@ #include "Trace.h" #include "URI.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" @@ -93,6 +94,7 @@ public: MessageHandler(ClangdLSPServer &Server) : Server(Server) {} bool onNotify(llvm::StringRef Method, llvm::json::Value Params) override { + WithContext HandlerContext(handlerContext()); log("<-- {0}", Method); if (Method == "exit") return false; @@ -109,6 +111,7 @@ public: bool onCall(llvm::StringRef Method, llvm::json::Value Params, llvm::json::Value ID) override { + WithContext HandlerContext(handlerContext()); // Calls can be canceled by the client. Add cancellation context. WithContext WithCancel(cancelableRequestContext(ID)); trace::Span Tracer(Method); @@ -129,6 +132,7 @@ public: bool onReply(llvm::json::Value ID, llvm::Expected<llvm::json::Value> Result) override { + WithContext HandlerContext(handlerContext()); // We ignore replies, just log them. if (Result) log("<-- reply({0})", ID); @@ -259,6 +263,13 @@ private: if (It != RequestCancelers.end()) It->second.first(); // Invoke the canceler. } + + Context handlerContext() const { + return Context::current().derive( + kCurrentOffsetEncoding, + Server.NegotiatedOffsetEncoding.getValueOr(OffsetEncoding::UTF16)); + } + // We run cancelable requests in a context that does two things: // - allows cancellation using RequestCancelers[ID] // - cleans up the entry in RequestCancelers when it's no longer needed @@ -302,6 +313,20 @@ void ClangdLSPServer::notify(llvm::StringRef Method, llvm::json::Value Params) { void ClangdLSPServer::onInitialize(const InitializeParams &Params, Callback<llvm::json::Value> Reply) { + // Determine character encoding first as it affects constructed ClangdServer. + if (Params.capabilities.offsetEncoding && !NegotiatedOffsetEncoding) { + NegotiatedOffsetEncoding = OffsetEncoding::UTF16; // fallback + for (OffsetEncoding Supported : *Params.capabilities.offsetEncoding) + if (Supported != OffsetEncoding::UnsupportedEncoding) { + NegotiatedOffsetEncoding = Supported; + break; + } + } + llvm::Optional<WithContextValue> WithOffsetEncoding; + if (NegotiatedOffsetEncoding) + WithOffsetEncoding.emplace(kCurrentOffsetEncoding, + *NegotiatedOffsetEncoding); + if (Params.rootUri && *Params.rootUri) ClangdServerOpts.WorkspaceRoot = Params.rootUri->file(); else if (Params.rootPath && !Params.rootPath->empty()) @@ -331,7 +356,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, SupportsHierarchicalDocumentSymbol = Params.capabilities.HierarchicalDocumentSymbol; SupportFileStatus = Params.initializationOptions.FileStatus; - Reply(llvm::json::Object{ + llvm::json::Object Result{ {{"capabilities", llvm::json::Object{ {"textDocumentSync", (int)TextDocumentSyncKind::Incremental}, @@ -369,7 +394,10 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, ExecuteCommandParams::CLANGD_APPLY_TWEAK}}, }}, {"typeHierarchyProvider", true}, - }}}}); + }}}}; + if (NegotiatedOffsetEncoding) + Result["offsetEncoding"] = *NegotiatedOffsetEncoding; + Reply(std::move(Result)); } void ClangdLSPServer::onShutdown(const ShutdownParams &Params, @@ -875,19 +903,19 @@ void ClangdLSPServer::onSymbolInfo(const TextDocumentPositionParams &Params, std::move(Reply)); } -ClangdLSPServer::ClangdLSPServer(class Transport &Transp, - const FileSystemProvider &FSProvider, - const clangd::CodeCompleteOptions &CCOpts, - llvm::Optional<Path> CompileCommandsDir, - bool UseDirBasedCDB, - const ClangdServer::Options &Opts) +ClangdLSPServer::ClangdLSPServer( + class Transport &Transp, const FileSystemProvider &FSProvider, + const clangd::CodeCompleteOptions &CCOpts, + llvm::Optional<Path> CompileCommandsDir, bool UseDirBasedCDB, + llvm::Optional<OffsetEncoding> ForcedOffsetEncoding, + const ClangdServer::Options &Opts) : Transp(Transp), MsgHandler(new MessageHandler(*this)), FSProvider(FSProvider), CCOpts(CCOpts), SupportedSymbolKinds(defaultSymbolKinds()), SupportedCompletionItemKinds(defaultCompletionItemKinds()), UseDirBasedCDB(UseDirBasedCDB), - CompileCommandsDir(std::move(CompileCommandsDir)), - ClangdServerOpts(Opts) { + CompileCommandsDir(std::move(CompileCommandsDir)), ClangdServerOpts(Opts), + NegotiatedOffsetEncoding(ForcedOffsetEncoding) { // clang-format off MsgHandler->bind("initialize", &ClangdLSPServer::onInitialize); MsgHandler->bind("shutdown", &ClangdLSPServer::onShutdown); diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index c785f47dbf0..d30c47e6a47 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -40,6 +40,7 @@ public: ClangdLSPServer(Transport &Transp, const FileSystemProvider &FSProvider, const clangd::CodeCompleteOptions &CCOpts, llvm::Optional<Path> CompileCommandsDir, bool UseDirBasedCDB, + llvm::Optional<OffsetEncoding> ForcedOffsetEncoding, const ClangdServer::Options &Opts); ~ClangdLSPServer(); @@ -165,6 +166,7 @@ private: // It is destroyed before run() returns, to ensure worker threads exit. ClangdServer::Options ClangdServerOpts; llvm::Optional<ClangdServer> Server; + llvm::Optional<OffsetEncoding> NegotiatedOffsetEncoding; }; } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index dfd130a77a5..e11c621aaa3 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/JSON.h" @@ -311,6 +312,11 @@ bool fromJSON(const llvm::json::Value &Params, ClientCapabilities &R) { } } } + if (auto *OffsetEncoding = O->get("offsetEncoding")) { + R.offsetEncoding.emplace(); + if (!fromJSON(*OffsetEncoding, *R.offsetEncoding)) + return false; + } return true; } @@ -932,5 +938,26 @@ bool fromJSON(const llvm::json::Value &Params, ReferenceParams &R) { return fromJSON(Params, Base); } +llvm::json::Value toJSON(const OffsetEncoding &OE) { + switch (OE) { + case OffsetEncoding::UTF8: + return "utf-8"; + case OffsetEncoding::UTF16: + return "utf-16"; + case OffsetEncoding::UnsupportedEncoding: + return "unknown"; + } +} +bool fromJSON(const llvm::json::Value &V, OffsetEncoding &OE) { + auto Str = V.getAsString(); + if (!Str) + return false; + OE = llvm::StringSwitch<OffsetEncoding>(*Str) + .Case("utf-8", OffsetEncoding::UTF8) + .Case("utf-16", OffsetEncoding::UTF16) + .Default(OffsetEncoding::UnsupportedEncoding); + return true; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 468ead8df17..7cf414123f1 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -338,6 +338,18 @@ SymbolKind adjustKindToCapability(SymbolKind Kind, // https://github.com/Microsoft/language-server-protocol/issues/344 SymbolKind indexSymbolKindToSymbolKind(index::SymbolKind Kind); +// Determines the encoding used to measure offsets and lengths of source in LSP. +enum class OffsetEncoding { + // Any string is legal on the wire. Unrecognized encodings parse as this. + UnsupportedEncoding, + // Length counts code units of UTF-16 encoded text. (Standard LSP behavior). + UTF16, + // Length counts bytes of UTF-8 encoded text. (Clangd extension). + UTF8, +}; +llvm::json::Value toJSON(const OffsetEncoding &); +bool fromJSON(const llvm::json::Value &, OffsetEncoding &); + // This struct doesn't mirror LSP! // The protocol defines deeply nested structures for client capabilities. // Instead of mapping them all, this just parses out the bits we care about. @@ -369,6 +381,9 @@ struct ClientCapabilities { /// Client supports CodeAction return value for textDocument/codeAction. /// textDocument.codeAction.codeActionLiteralSupport. bool CodeActionStructure = false; + + /// Supported encodings for LSP character offsets. (clangd extension). + llvm::Optional<std::vector<OffsetEncoding>> offsetEncoding; }; bool fromJSON(const llvm::json::Value &, ClientCapabilities &); diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 4366f36072e..cdafaf9636c 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "SourceCode.h" +#include "Context.h" #include "Logger.h" +#include "Protocol.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" @@ -67,8 +69,23 @@ static size_t measureUTF16(llvm::StringRef U8, int U16Units, bool &Valid) { return std::min(Result, U8.size()); } +Key<OffsetEncoding> kCurrentOffsetEncoding; +static bool useUTF16ForLSP() { + auto *Enc = Context::current().get(kCurrentOffsetEncoding); + switch (Enc ? *Enc : OffsetEncoding::UTF16) { + case OffsetEncoding::UTF16: + return true; + case OffsetEncoding::UTF8: + return false; + case OffsetEncoding::UnsupportedEncoding: + llvm_unreachable("cannot use an unsupported encoding"); + } +} + // Like most strings in clangd, the input is UTF-8 encoded. size_t lspLength(llvm::StringRef Code) { + if (!useUTF16ForLSP()) + return Code.size(); // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. size_t Count = 0; @@ -98,14 +115,25 @@ llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, llvm::errc::invalid_argument); StartOfLine = NextNL + 1; } - - size_t NextNL = Code.find('\n', StartOfLine); - if (NextNL == llvm::StringRef::npos) - NextNL = Code.size(); - + StringRef Line = + Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); + + if (!useUTF16ForLSP()) { + // Bounds-checking only. + if (P.character > int(Line.size())) { + if (AllowColumnsBeyondLineLength) + return StartOfLine + Line.size(); + else + return llvm::make_error<llvm::StringError>( + llvm::formatv("UTF-8 offset {0} overruns line {1}", P.character, + P.line), + llvm::errc::invalid_argument); + } + return StartOfLine + P.character; + } + // P.character is in UTF-16 code units, so we have to transcode. bool Valid; - size_t ByteOffsetInLine = measureUTF16( - Code.substr(StartOfLine, NextNL - StartOfLine), P.character, Valid); + size_t ByteOffsetInLine = measureUTF16(Line, P.character, Valid); if (!Valid && !AllowColumnsBeyondLineLength) return llvm::make_error<llvm::StringError>( llvm::formatv("UTF-16 offset {0} is invalid for line {1}", P.character, diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index e6ce8c3be5d..55289e00719 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H +#include "Context.h" #include "Protocol.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" @@ -34,8 +35,14 @@ using FileDigest = decltype(llvm::SHA1::hash({})); FileDigest digest(StringRef Content); Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID); +// This context variable controls the behavior of functions in this file +// that convert between LSP offsets and native clang byte offsets. +// If not set, defaults to UTF-16 for backwards-compatibility. +extern Key<OffsetEncoding> kCurrentOffsetEncoding; + // Counts the number of UTF-16 code units needed to represent a string (LSP // specifies string lengths in UTF-16 code units). +// Use of UTF-16 may be overridden by kCurrentOffsetEncoding. size_t lspLength(StringRef Code); /// Turn a [line, column] pair into an offset in Code. diff --git a/clang-tools-extra/clangd/index/IndexAction.cpp b/clang-tools-extra/clangd/index/IndexAction.cpp index 61db161bd5b..bda2cf44a58 100644 --- a/clang-tools-extra/clangd/index/IndexAction.cpp +++ b/clang-tools-extra/clangd/index/IndexAction.cpp @@ -9,7 +9,6 @@ #include "IndexAction.h" #include "index/SymbolOrigin.h" #include "clang/Frontend/CompilerInstance.h" -#include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexingAction.h" #include "clang/Tooling/Tooling.h" diff --git a/clang-tools-extra/clangd/index/SymbolLocation.h b/clang-tools-extra/clangd/index/SymbolLocation.h index f1b7ffd5e0b..9b503546a9f 100644 --- a/clang-tools-extra/clangd/index/SymbolLocation.h +++ b/clang-tools-extra/clangd/index/SymbolLocation.h @@ -20,6 +20,13 @@ struct SymbolLocation { // Specify a position (Line, Column) of symbol. Using Line/Column allows us to // build LSP responses without reading the file content. // + // clangd uses the following definitions, which differ slightly from LSP: + // - Line is the number of newline characters (\n) before the point. + // - Column is (by default) the number of UTF-16 code between the last \n + // (or start of file) and the point. + // If the `offsetEncoding` protocol extension is used to negotiate UTF-8, + // then it is instead the number of *bytes* since the last \n. + // // Position is encoded into 32 bits to save space. // If Line/Column overflow, the value will be their maximum value. struct Position { @@ -37,8 +44,7 @@ struct SymbolLocation { static constexpr uint32_t MaxColumn = (1 << 12) - 1; private: - uint32_t Line : 20; // 0-based - // Using UTF-16 code units. + uint32_t Line : 20; // 0-based uint32_t Column : 12; // 0-based }; diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 53f7264e565..979c2d078a3 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -9,10 +9,12 @@ #include "Features.inc" #include "ClangdLSPServer.h" #include "Path.h" +#include "Protocol.h" #include "Trace.h" #include "Transport.h" #include "index/Serialization.h" #include "clang/Basic/Version.h" +#include "llvm/ADT/Optional.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -219,6 +221,16 @@ static llvm::cl::opt<bool> SuggestMissingIncludes( "includes using index."), llvm::cl::init(true)); +static llvm::cl::opt<OffsetEncoding> ForceOffsetEncoding( + "offset-encoding", + llvm::cl::desc("Force the offsetEncoding used for character positions. " + "This bypasses negotiation via client capabilities."), + llvm::cl::values(clEnumValN(OffsetEncoding::UTF8, "utf-8", + "Offsets are in UTF-8 bytes"), + clEnumValN(OffsetEncoding::UTF16, "utf-16", + "Offsets are in UTF-16 code units")), + llvm::cl::init(OffsetEncoding::UnsupportedEncoding)); + namespace { /// \brief Supports a test URI scheme with relaxed constraints for lit tests. @@ -458,9 +470,13 @@ int main(int argc, char *argv[]) { } Opts.ClangTidyOptProvider = ClangTidyOptProvider.get(); Opts.SuggestMissingIncludes = SuggestMissingIncludes; + llvm::Optional<OffsetEncoding> OffsetEncodingFromFlag; + if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding) + OffsetEncodingFromFlag = ForceOffsetEncoding; ClangdLSPServer LSPServer( *TransportLayer, FSProvider, CCOpts, CompileCommandsDirPath, - /*UseDirBasedCDB=*/CompileArgsFrom == FilesystemCompileArgs, Opts); + /*UseDirBasedCDB=*/CompileArgsFrom == FilesystemCompileArgs, + OffsetEncodingFromFlag, Opts); llvm::set_thread_name("clangd.main"); return LSPServer.run() ? 0 : static_cast<int>(ErrorResultCode::NoShutdownRequest); |