//===--- FormattedString.cpp --------------------------------*- C++-*------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "FormattedString.h" #include "clang/Basic/CharInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include namespace clang { namespace clangd { namespace markup { namespace { /// Escape a markdown text block. Ensures the punctuation will not introduce /// any of the markdown constructs. std::string renderText(llvm::StringRef Input) { // Escaping ASCII punctiation ensures we can't start a markdown construct. constexpr llvm::StringLiteral Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt"; std::string R; for (size_t From = 0; From < Input.size();) { size_t Next = Input.find_first_of(Punctuation, From); R += Input.substr(From, Next - From); if (Next == llvm::StringRef::npos) break; R += "\\"; R += Input[Next]; From = Next + 1; } return R; } /// Renders \p Input as an inline block of code in markdown. The returned value /// is surrounded by backticks and the inner contents are properly escaped. std::string renderInlineBlock(llvm::StringRef Input) { std::string R; // Double all backticks to make sure we don't close the inline block early. for (size_t From = 0; From < Input.size();) { size_t Next = Input.find("`", From); R += Input.substr(From, Next - From); if (Next == llvm::StringRef::npos) break; R += "``"; // double the found backtick. From = Next + 1; } // If results starts with a backtick, add spaces on both sides. The spaces // are ignored by markdown renderers. if (llvm::StringRef(R).startswith("`") || llvm::StringRef(R).endswith("`")) return "` " + std::move(R) + " `"; // Markdown render should ignore first and last space if both are there. We // add an extra pair of spaces in that case to make sure we render what the // user intended. if (llvm::StringRef(R).startswith(" ") && llvm::StringRef(R).endswith(" ")) return "` " + std::move(R) + " `"; return "`" + std::move(R) + "`"; } /// Get marker required for \p Input to represent a markdown codeblock. It /// consists of at least 3 backticks(`). Although markdown also allows to use /// tilde(~) for code blocks, they are never used. std::string getMarkerForCodeBlock(llvm::StringRef Input) { // Count the maximum number of consecutive backticks in \p Input. We need to // start and end the code block with more. unsigned MaxBackticks = 0; unsigned Backticks = 0; for (char C : Input) { if (C == '`') { ++Backticks; continue; } MaxBackticks = std::max(MaxBackticks, Backticks); Backticks = 0; } MaxBackticks = std::max(Backticks, MaxBackticks); // Use the corresponding number of backticks to start and end a code block. return std::string(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`'); } // Trims the input and concatanates whitespace blocks into a single ` `. std::string canonicalizeSpaces(std::string Input) { // Goes over the string and preserves only a single ` ` for any whitespace // chunks, the rest is moved to the end of the string and dropped in the end. auto WritePtr = Input.begin(); llvm::SmallVector Words; llvm::SplitString(Input, Words); if (Words.empty()) return ""; // Go over each word and and add it to the string. for (llvm::StringRef Word : Words) { llvm::for_each(Word, [&WritePtr](const char C) { *WritePtr++ = C; }); // Separate from next block. *WritePtr++ = ' '; } // Get rid of extra spaces, -1 is for the trailing space introduced with last // word. Input.resize(WritePtr - Input.begin() - 1); return Input; } std::string renderBlocks(llvm::ArrayRef> Children, void (Block::*RenderFunc)(llvm::raw_ostream &) const) { std::string R; llvm::raw_string_ostream OS(R); for (auto &C : Children) ((*C).*RenderFunc)(OS); return llvm::StringRef(OS.str()).trim().str(); } // Puts a vertical space between blocks inside a document. class Spacer : public Block { public: void renderMarkdown(llvm::raw_ostream &OS) const override { OS << '\n'; } void renderPlainText(llvm::raw_ostream &OS) const override { OS << '\n'; } }; class CodeBlock : public Block { public: void renderMarkdown(llvm::raw_ostream &OS) const override { std::string Marker = getMarkerForCodeBlock(Contents); // No need to pad from previous blocks, as they should end with a new line. OS << Marker << Language << '\n' << Contents << '\n' << Marker << '\n'; } void renderPlainText(llvm::raw_ostream &OS) const override { // In plaintext we want one empty line before and after codeblocks. OS << '\n' << Contents << "\n\n"; } CodeBlock(std::string Contents, std::string Language) : Contents(std::move(Contents)), Language(std::move(Language)) {} private: std::string Contents; std::string Language; }; } // namespace std::string Block::asMarkdown() const { std::string R; llvm::raw_string_ostream OS(R); renderMarkdown(OS); return llvm::StringRef(OS.str()).trim().str(); } std::string Block::asPlainText() const { std::string R; llvm::raw_string_ostream OS(R); renderPlainText(OS); return llvm::StringRef(OS.str()).trim().str(); } void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const { llvm::StringRef Sep = ""; for (auto &C : Chunks) { OS << Sep; switch (C.Kind) { case Chunk::PlainText: OS << renderText(C.Contents); break; case Chunk::InlineCode: OS << renderInlineBlock(C.Contents); break; } Sep = " "; } // Paragraphs are translated into markdown lines, not markdown paragraphs. // Therefore it only has a single linebreak afterwards. OS << '\n'; } void Paragraph::renderPlainText(llvm::raw_ostream &OS) const { llvm::StringRef Sep = ""; for (auto &C : Chunks) { OS << Sep << C.Contents; Sep = " "; } OS << '\n'; } Paragraph &Paragraph::appendText(std::string Text) { Text = canonicalizeSpaces(std::move(Text)); if (Text.empty()) return *this; Chunks.emplace_back(); Chunk &C = Chunks.back(); C.Contents = std::move(Text); C.Kind = Chunk::PlainText; return *this; } Paragraph &Paragraph::appendCode(std::string Code) { Code = canonicalizeSpaces(std::move(Code)); if (Code.empty()) return *this; Chunks.emplace_back(); Chunk &C = Chunks.back(); C.Contents = std::move(Code); C.Kind = Chunk::InlineCode; return *this; } Paragraph &Document::addParagraph() { Children.push_back(std::make_unique()); return *static_cast(Children.back().get()); } void Document::addSpacer() { Children.push_back(std::make_unique()); } void Document::addCodeBlock(std::string Code, std::string Language) { Children.emplace_back( std::make_unique(std::move(Code), std::move(Language))); } std::string Document::asMarkdown() const { return renderBlocks(Children, &Block::renderMarkdown); } std::string Document::asPlainText() const { return renderBlocks(Children, &Block::renderPlainText); } } // namespace markup } // namespace clangd } // namespace clang