diff options
author | Eric Liu <ioeric@google.com> | 2018-05-14 20:17:53 +0000 |
---|---|---|
committer | Eric Liu <ioeric@google.com> | 2018-05-14 20:17:53 +0000 |
commit | 7129e63bcce96529291b3ffbe596f7f8588bdcc4 (patch) | |
tree | 30553e4cf06fa6ddb8060b15f7d5d83f5163f5d2 /clang/lib/Format/Format.cpp | |
parent | d699da427a4c49bc01c9ee3d031c7885383568f9 (diff) | |
download | bcm5719-llvm-7129e63bcce96529291b3ffbe596f7f8588bdcc4.tar.gz bcm5719-llvm-7129e63bcce96529291b3ffbe596f7f8588bdcc4.zip |
[Tooling] Pull #include manipulation code from clangFormat into libToolingCore.
Summary: Also pull #include related style out of FormatStyle as tooling::IncludeStyle.
Reviewers: ilya-biryukov
Reviewed By: ilya-biryukov
Subscribers: klimek, mgorny, cfe-commits, djasper
Differential Revision: https://reviews.llvm.org/D46496
llvm-svn: 332287
Diffstat (limited to 'clang/lib/Format/Format.cpp')
-rw-r--r-- | clang/lib/Format/Format.cpp | 398 |
1 files changed, 5 insertions, 393 deletions
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index b2e05ad2464..53db932a5a0 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -31,6 +31,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/VirtualFileSystem.h" #include "clang/Lex/Lexer.h" +#include "clang/Tooling/Core/HeaderIncludes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" @@ -1676,62 +1677,6 @@ static void sortCppIncludes(const FormatStyle &Style, namespace { -// This class manages priorities of #include categories and calculates -// priorities for headers. -class IncludeCategoryManager { -public: - IncludeCategoryManager(const FormatStyle &Style, StringRef FileName) - : Style(Style), FileName(FileName) { - FileStem = llvm::sys::path::stem(FileName); - for (const auto &Category : Style.IncludeStyle.IncludeCategories) - CategoryRegexs.emplace_back(Category.Regex, llvm::Regex::IgnoreCase); - IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") || - FileName.endswith(".cpp") || FileName.endswith(".c++") || - FileName.endswith(".cxx") || FileName.endswith(".m") || - FileName.endswith(".mm"); - } - - // Returns the priority of the category which \p IncludeName belongs to. - // If \p CheckMainHeader is true and \p IncludeName is a main header, returns - // 0. Otherwise, returns the priority of the matching category or INT_MAX. - // NOTE: this API is not thread-safe! - int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) const { - int Ret = INT_MAX; - for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) - if (CategoryRegexs[i].match(IncludeName)) { - Ret = Style.IncludeStyle.IncludeCategories[i].Priority; - break; - } - if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) - Ret = 0; - return Ret; - } - -private: - bool isMainHeader(StringRef IncludeName) const { - if (!IncludeName.startswith("\"")) - return false; - StringRef HeaderStem = - llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1)); - if (FileStem.startswith(HeaderStem) || - FileStem.startswith_lower(HeaderStem)) { - llvm::Regex MainIncludeRegex( - (HeaderStem + Style.IncludeStyle.IncludeIsMainRegex).str(), - llvm::Regex::IgnoreCase); - if (MainIncludeRegex.match(FileStem)) - return true; - } - return false; - } - - const FormatStyle &Style; - bool IsMainFile; - StringRef FileName; - StringRef FileStem; - // Regex is not thread-safe. - mutable SmallVector<llvm::Regex, 4> CategoryRegexs; -}; - const char IncludeRegexPattern[] = R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; @@ -1755,7 +1700,7 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, // // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix // cases where the first #include is unlikely to be the main header. - IncludeCategoryManager Categories(Style, FileName); + tooling::IncludeCategoryManager Categories(Style.IncludeStyle, FileName); bool FirstIncludeBlock = true; bool MainIncludeFound = false; bool FormattingOff = false; @@ -1885,339 +1830,6 @@ inline bool isHeaderDeletion(const tooling::Replacement &Replace) { return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1; } -// Returns the offset after skipping a sequence of tokens, matched by \p -// GetOffsetAfterSequence, from the start of the code. -// \p GetOffsetAfterSequence should be a function that matches a sequence of -// tokens and returns an offset after the sequence. -unsigned getOffsetAfterTokenSequence( - StringRef FileName, StringRef Code, const FormatStyle &Style, - llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> - GetOffsetAfterSequence) { - Environment Env(Code, FileName, /*Ranges=*/{}); - const SourceManager &SourceMgr = Env.getSourceManager(); - Lexer Lex(Env.getFileID(), SourceMgr.getBuffer(Env.getFileID()), SourceMgr, - getFormattingLangOpts(Style)); - Token Tok; - // Get the first token. - Lex.LexFromRawLexer(Tok); - return GetOffsetAfterSequence(SourceMgr, Lex, Tok); -} - -// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, -// \p Tok will be the token after this directive; otherwise, it can be any token -// after the given \p Tok (including \p Tok). -bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) { - bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && - Tok.is(tok::raw_identifier) && - Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) && - Tok.is(tok::raw_identifier); - if (Matched) - Lex.LexFromRawLexer(Tok); - return Matched; -} - -void skipComments(Lexer &Lex, Token &Tok) { - while (Tok.is(tok::comment)) - if (Lex.LexFromRawLexer(Tok)) - return; -} - -// Returns the offset after header guard directives and any comments -// before/after header guards. If no header guard presents in the code, this -// will returns the offset after skipping all comments from the start of the -// code. -unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, - StringRef Code, - const FormatStyle &Style) { - return getOffsetAfterTokenSequence( - FileName, Code, Style, - [](const SourceManager &SM, Lexer &Lex, Token Tok) { - skipComments(Lex, Tok); - unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); - if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { - skipComments(Lex, Tok); - if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) - return SM.getFileOffset(Tok.getLocation()); - } - return InitialOffset; - }); -} - -// Check if a sequence of tokens is like -// "#include ("header.h" | <header.h>)". -// If it is, \p Tok will be the token after this directive; otherwise, it can be -// any token after the given \p Tok (including \p Tok). -bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { - auto Matched = [&]() { - Lex.LexFromRawLexer(Tok); - return true; - }; - if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && - Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") { - if (Lex.LexFromRawLexer(Tok)) - return false; - if (Tok.is(tok::string_literal)) - return Matched(); - if (Tok.is(tok::less)) { - while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) { - } - if (Tok.is(tok::greater)) - return Matched(); - } - } - return false; -} - -// Returns the offset of the last #include directive after which a new -// #include can be inserted. This ignores #include's after the #include block(s) -// in the beginning of a file to avoid inserting headers into code sections -// where new #include's should not be added by default. -// These code sections include: -// - raw string literals (containing #include). -// - #if blocks. -// - Special #include's among declarations (e.g. functions). -// -// If no #include after which a new #include can be inserted, this returns the -// offset after skipping all comments from the start of the code. -// Inserting after an #include is not allowed if it comes after code that is not -// #include (e.g. pre-processing directive that is not #include, declarations). -unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, - const FormatStyle &Style) { - return getOffsetAfterTokenSequence( - FileName, Code, Style, - [](const SourceManager &SM, Lexer &Lex, Token Tok) { - skipComments(Lex, Tok); - unsigned MaxOffset = SM.getFileOffset(Tok.getLocation()); - while (checkAndConsumeInclusiveDirective(Lex, Tok)) - MaxOffset = SM.getFileOffset(Tok.getLocation()); - return MaxOffset; - }); -} - -/// Generates replacements for inserting or deleting #include directives in a -/// file. -class HeaderIncludes { -public: - HeaderIncludes(llvm::StringRef FileName, llvm::StringRef Code, - const FormatStyle &Style); - - /// Inserts an #include directive of \p Header into the code. If \p IsAngled - /// is true, \p Header will be quoted with <> in the directive; otherwise, it - /// will be quoted with "". - /// - /// When searching for points to insert new header, this ignores #include's - /// after the #include block(s) in the beginning of a file to avoid inserting - /// headers into code sections where new #include's should not be added by - /// default. These code sections include: - /// - raw string literals (containing #include). - /// - #if blocks. - /// - Special #include's among declarations (e.g. functions). - /// - /// Returns a replacement that inserts the new header into a suitable #include - /// block of the same category. This respects the order of the existing - /// #includes in the block; if the existing #includes are not already sorted, - /// this will simply insert the #include in front of the first #include of the - /// same category in the code that should be sorted after \p IncludeName. If - /// \p IncludeName already exists (with exactly the same spelling), this - /// returns None. - llvm::Optional<tooling::Replacement> insert(llvm::StringRef Header, - bool IsAngled) const; - - /// Removes all existing #includes of \p Header quoted with <> if \p IsAngled - /// is true or "" if \p IsAngled is false. - /// This doesn't resolve the header file path; it only deletes #includes with - /// exactly the same spelling. - tooling::Replacements remove(llvm::StringRef Header, bool IsAngled) const; - -private: - struct Include { - Include(StringRef Name, tooling::Range R) : Name(Name), R(R) {} - - // An include header quoted with either <> or "". - std::string Name; - // The range of the whole line of include directive including any eading - // whitespaces and trailing comment. - tooling::Range R; - }; - - void addExistingInclude(Include IncludeToAdd, unsigned NextLineOffset); - - std::string FileName; - std::string Code; - - // Map from include name (quotation trimmed) to a list of existing includes - // (in case there are more than one) with the name in the current file. <x> - // and "x" will be treated as the same header when deleting #includes. - llvm::StringMap<llvm::SmallVector<Include, 1>> ExistingIncludes; - - /// Map from priorities of #include categories to all #includes in the same - /// category. This is used to find #includes of the same category when - /// inserting new #includes. #includes in the same categories are sorted in - /// in the order they appear in the source file. - /// See comment for "FormatStyle::IncludeCategories" for details about include - /// priorities. - std::unordered_map<int, llvm::SmallVector<const Include *, 8>> - IncludesByPriority; - - int FirstIncludeOffset; - // All new headers should be inserted after this offset (e.g. after header - // guards, file comment). - unsigned MinInsertOffset; - // Max insertion offset in the original code. For example, we want to avoid - // inserting new #includes into the actual code section (e.g. after a - // declaration). - unsigned MaxInsertOffset; - IncludeCategoryManager Categories; - // Record the offset of the end of the last include in each category. - std::unordered_map<int, int> CategoryEndOffsets; - - // All possible priorities. - std::set<int> Priorities; - - // Matches a whole #include directive. - llvm::Regex IncludeRegex; -}; - -HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code, - const FormatStyle &Style) - : FileName(FileName), Code(Code), FirstIncludeOffset(-1), - MinInsertOffset( - getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), - MaxInsertOffset(MinInsertOffset + - getMaxHeaderInsertionOffset( - FileName, Code.drop_front(MinInsertOffset), Style)), - Categories(Style, FileName), - IncludeRegex(llvm::Regex(IncludeRegexPattern)) { - // Add 0 for main header and INT_MAX for headers that are not in any - // category. - Priorities = {0, INT_MAX}; - for (const auto &Category : Style.IncludeStyle.IncludeCategories) - Priorities.insert(Category.Priority); - SmallVector<StringRef, 32> Lines; - Code.drop_front(MinInsertOffset).split(Lines, "\n"); - - unsigned Offset = MinInsertOffset; - unsigned NextLineOffset; - SmallVector<StringRef, 4> Matches; - for (auto Line : Lines) { - NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); - if (IncludeRegex.match(Line, &Matches)) { - // If this is the last line without trailing newline, we need to make - // sure we don't delete across the file boundary. - addExistingInclude( - Include(Matches[2], - tooling::Range( - Offset, std::min(Line.size() + 1, Code.size() - Offset))), - NextLineOffset); - } - Offset = NextLineOffset; - } - - // Populate CategoryEndOfssets: - // - Ensure that CategoryEndOffset[Highest] is always populated. - // - If CategoryEndOffset[Priority] isn't set, use the next higher value - // that is set, up to CategoryEndOffset[Highest]. - auto Highest = Priorities.begin(); - if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) { - if (FirstIncludeOffset >= 0) - CategoryEndOffsets[*Highest] = FirstIncludeOffset; - else - CategoryEndOffsets[*Highest] = MinInsertOffset; - } - // By this point, CategoryEndOffset[Highest] is always set appropriately: - // - to an appropriate location before/after existing #includes, or - // - to right after the header guard, or - // - to the beginning of the file. - for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I) - if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end()) - CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)]; -} - -inline StringRef trimInclude(StringRef IncludeName) { - return IncludeName.trim("\"<>"); -} - -// \p Offset: the start of the line following this include directive. -void HeaderIncludes::addExistingInclude(Include IncludeToAdd, - unsigned NextLineOffset) { - auto Iter = - ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first; - Iter->second.push_back(std::move(IncludeToAdd)); - auto &CurInclude = Iter->second.back(); - // The header name with quotes or angle brackets. - // Only record the offset of current #include if we can insert after it. - if (CurInclude.R.getOffset() <= MaxInsertOffset) { - int Priority = Categories.getIncludePriority( - CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0); - CategoryEndOffsets[Priority] = NextLineOffset; - IncludesByPriority[Priority].push_back(&CurInclude); - if (FirstIncludeOffset < 0) - FirstIncludeOffset = CurInclude.R.getOffset(); - } -} - -llvm::Optional<tooling::Replacement> -HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled) const { - assert(IncludeName == trimInclude(IncludeName)); - // If a <header> ("header") already exists in code, "header" (<header>) with - // different quotation will still be inserted. - // FIXME: figure out if this is the best behavior. - auto It = ExistingIncludes.find(IncludeName); - if (It != ExistingIncludes.end()) - for (const auto &Inc : It->second) - if ((IsAngled && StringRef(Inc.Name).startswith("<")) || - (!IsAngled && StringRef(Inc.Name).startswith("\""))) - return llvm::None; - std::string Quoted = IsAngled ? ("<" + IncludeName + ">").str() - : ("\"" + IncludeName + "\"").str(); - StringRef QuotedName = Quoted; - int Priority = Categories.getIncludePriority( - QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0); - auto CatOffset = CategoryEndOffsets.find(Priority); - assert(CatOffset != CategoryEndOffsets.end()); - unsigned InsertOffset = CatOffset->second; // Fall back offset - auto Iter = IncludesByPriority.find(Priority); - if (Iter != IncludesByPriority.end()) { - for (const auto *Inc : Iter->second) { - if (QuotedName < Inc->Name) { - InsertOffset = Inc->R.getOffset(); - break; - } - } - } - assert(InsertOffset <= Code.size()); - std::string NewInclude = ("#include " + QuotedName + "\n").str(); - // When inserting headers at end of the code, also append '\n' to the code - // if it does not end with '\n'. - // FIXME: when inserting multiple #includes at the end of code, only one - // newline should be added. - if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n')) - NewInclude = "\n" + NewInclude; - return tooling::Replacement(FileName, InsertOffset, 0, NewInclude); -} - -tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName, - bool IsAngled) const { - assert(IncludeName == trimInclude(IncludeName)); - tooling::Replacements Result; - auto Iter = ExistingIncludes.find(IncludeName); - if (Iter == ExistingIncludes.end()) - return Result; - for (const auto &Inc : Iter->second) { - if ((IsAngled && StringRef(Inc.Name).startswith("\"")) || - (!IsAngled && StringRef(Inc.Name).startswith("<"))) - continue; - llvm::Error Err = Result.add(tooling::Replacement( - FileName, Inc.R.getOffset(), Inc.R.getLength(), "")); - if (Err) { - auto ErrMsg = "Unexpected conflicts in #include deletions: " + - llvm::toString(std::move(Err)); - llvm_unreachable(ErrMsg.c_str()); - } - } - return Result; -} - // FIXME: insert empty lines between newly created blocks. tooling::Replacements fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, @@ -2248,11 +1860,11 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, StringRef FileName = Replaces.begin()->getFilePath(); - HeaderIncludes Includes(FileName, Code, Style); + tooling::HeaderIncludes Includes(FileName, Code, Style.IncludeStyle); for (const auto &Header : HeadersToDelete) { tooling::Replacements Replaces = - Includes.remove(trimInclude(Header), Header.startswith("<")); + Includes.remove(Header.trim("\"<>"), Header.startswith("<")); for (const auto &R : Replaces) { auto Err = Result.add(R); if (Err) { @@ -2274,7 +1886,7 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, (void)Matched; auto IncludeName = Matches[2]; auto Replace = - Includes.insert(trimInclude(IncludeName), IncludeName.startswith("<")); + Includes.insert(IncludeName.trim("\"<>"), IncludeName.startswith("<")); if (Replace) { auto Err = Result.add(*Replace); if (Err) { |