diff options
Diffstat (limited to 'clang-tools-extra/cpp11-migrate')
3 files changed, 616 insertions, 0 deletions
diff --git a/clang-tools-extra/cpp11-migrate/Core/CMakeLists.txt b/clang-tools-extra/cpp11-migrate/Core/CMakeLists.txt index d89d86cc545..0ac52230e6a 100644 --- a/clang-tools-extra/cpp11-migrate/Core/CMakeLists.txt +++ b/clang-tools-extra/cpp11-migrate/Core/CMakeLists.txt @@ -8,6 +8,7 @@ add_clang_library(migrateCore IncludeExcludeInfo.cpp PerfSupport.cpp Reformatting.cpp + IncludeDirectives.cpp ) target_link_libraries(migrateCore clangFormat diff --git a/clang-tools-extra/cpp11-migrate/Core/IncludeDirectives.cpp b/clang-tools-extra/cpp11-migrate/Core/IncludeDirectives.cpp new file mode 100644 index 00000000000..f240950790b --- /dev/null +++ b/clang-tools-extra/cpp11-migrate/Core/IncludeDirectives.cpp @@ -0,0 +1,474 @@ +//===-- Core/IncludeDirectives.cpp - Include directives handling ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the IncludeDirectives class that helps with +/// detecting and modifying \#include directives. +/// +//===----------------------------------------------------------------------===// + +#include "IncludeDirectives.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include <stack> + +using namespace clang; +using namespace clang::tooling; +using llvm::StringRef; + +/// \brief PPCallbacks that fills-in the include information in the given +/// \c IncludeDirectives. +class IncludeDirectivesPPCallback : public clang::PPCallbacks { + // Struct helping the detection of header guards in the various callbacks + struct GuardDetection { + GuardDetection(FileID FID) + : FID(FID), Count(0), TheMacro(0), CountAtEndif(0) {} + + FileID FID; + // count for relevant preprocessor directives + unsigned Count; + // the macro that is tested in the top most ifndef for the header guard + // (e.g: GUARD_H) + const IdentifierInfo *TheMacro; + // the hash locations of #ifndef, #define, #endif + SourceLocation IfndefLoc, DefineLoc, EndifLoc; + // the value of Count once the #endif is reached + unsigned CountAtEndif; + + /// \brief Check that with all the information gathered if this is a + /// potential header guard. + /// + /// Meaning a top-most \#ifndef has been found, followed by a define and the + /// last preprocessor directive was the terminating \#endif. + /// + /// FIXME: accept the \#if !defined identifier form too. + bool isPotentialHeaderGuard() const { + return Count == CountAtEndif && DefineLoc.isValid(); + } + }; + +public: + IncludeDirectivesPPCallback(IncludeDirectives *Self) : Self(Self), Guard(0) {} + +private: + virtual ~IncludeDirectivesPPCallback() {} + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, const FileEntry *File, + StringRef SearchPath, StringRef RelativePath, + const Module *Imported) LLVM_OVERRIDE { + SourceManager &SM = Self->Sources; + const FileEntry *FE = SM.getFileEntryForID(SM.getFileID(HashLoc)); + assert(FE && "Valid file expected."); + + IncludeDirectives::Entry E(HashLoc, File, IsAngled); + Self->FileToEntries[FE].push_back(E); + Self->IncludeAsWrittenToLocationsMap[FileName].push_back(HashLoc); + } + + // Keep track of the current file in the stack + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) { + SourceManager &SM = Self->Sources; + switch (Reason) { + case EnterFile: + Files.push(GuardDetection(SM.getFileID(Loc))); + Guard = &Files.top(); + break; + + case ExitFile: + if (Guard->isPotentialHeaderGuard()) + handlePotentialHeaderGuard(*Guard); + Files.pop(); + Guard = &Files.top(); + break; + + default: + break; + } + } + + /// \brief Mark this header as guarded in the IncludeDirectives if it's a + /// proper header guard. + void handlePotentialHeaderGuard(const GuardDetection &Guard) { + SourceManager &SM = Self->Sources; + const FileEntry *File = SM.getFileEntryForID(Guard.FID); + const LangOptions &LangOpts = Self->CI.getLangOpts(); + + // Null file can happen for the <built-in> buffer for example. They + // shouldn't have header guards though... + if (!File) + return; + + // The #ifndef should be the next thing after the preamble. We aren't + // checking for equality because it can also be part of the preamble if the + // preamble is the whole file. + unsigned Preamble = + Lexer::ComputePreamble(SM.getBuffer(Guard.FID), LangOpts).first; + unsigned IfndefOffset = SM.getFileOffset(Guard.IfndefLoc); + if (IfndefOffset > (Preamble + 1)) + return; + + // No code is allowed in the code remaining after the #endif. + const llvm::MemoryBuffer *Buffer = SM.getBuffer(Guard.FID); + Lexer Lex(SM.getLocForStartOfFile(Guard.FID), LangOpts, + Buffer->getBufferStart(), + Buffer->getBufferStart() + SM.getFileOffset(Guard.EndifLoc), + Buffer->getBufferEnd()); + + // Find the first newline not part of a multi-line comment. + Token Tok; + Lex.LexFromRawLexer(Tok); // skip endif + Lex.LexFromRawLexer(Tok); + + // Not a proper header guard, the remainder of the file contains something + // else than comments or whitespaces. + if (Tok.isNot(tok::eof)) + return; + + // Add to the location of the define to the IncludeDirectives for this file. + Self->HeaderToGuard[File] = Guard.DefineLoc; + } + + virtual void Ifndef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDirective *MD) { + Guard->Count++; + + // If this #ifndef is the top-most directive and the symbol isn't defined + // store those information in the guard detection, the next step will be to + // check for the define. + if (Guard->Count == 1 && MD == 0) { + IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); + + if (MII->hasMacroDefinition()) + return; + Guard->IfndefLoc = Loc; + Guard->TheMacro = MII; + } + } + + virtual void MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) { + Guard->Count++; + + // If this #define is the second directive of the file and the symbol + // defined is the same as the one checked in the #ifndef then store the + // information about this define. + if (Guard->Count == 2 && Guard->TheMacro != 0) { + IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); + + // macro unrelated to the ifndef, doesn't look like a proper header guard + if (MII->getName() != Guard->TheMacro->getName()) + return; + + Guard->DefineLoc = MacroNameTok.getLocation(); + } + } + + virtual void Endif(SourceLocation Loc, SourceLocation IfLoc) { + Guard->Count++; + + // If it's the #endif corresponding to the top-most #ifndef + if (Self->Sources.getDecomposedLoc(Guard->IfndefLoc) != + Self->Sources.getDecomposedLoc(IfLoc)) + return; + + // And that the top-most #ifndef was followed by the right #define + if (Guard->DefineLoc.isInvalid()) + return; + + // Then save the information about this #endif. Once the file is exited we + // will check if it was the final preprocessor directive. + Guard->CountAtEndif = Guard->Count; + Guard->EndifLoc = Loc; + } + + virtual void MacroExpands(const Token &, const MacroDirective *, SourceRange, + const MacroArgs *) { + Guard->Count++; + } + virtual void MacroUndefined(const Token &, const MacroDirective *) { + Guard->Count++; + } + virtual void Defined(const Token &, const MacroDirective *, SourceRange) { + Guard->Count++; + } + virtual void If(SourceLocation, SourceRange, bool) { Guard->Count++; } + virtual void Elif(SourceLocation, SourceRange, bool, SourceLocation) { + Guard->Count++; + } + virtual void Ifdef(SourceLocation, const Token &, const MacroDirective *) { + Guard->Count++; + } + virtual void Else(SourceLocation, SourceLocation) { Guard->Count++; } + + IncludeDirectives *Self; + // keep track of the guard info through the include stack + std::stack<GuardDetection> Files; + // convenience field pointing to Files.top().second + GuardDetection *Guard; +}; + +// Flags that describes where to insert newlines. +enum NewLineFlags { + // Prepend a newline at the beginning of the insertion. + NL_Prepend = 0x1, + + // Prepend another newline at the end of the insertion. + NL_PrependAnother = 0x2, + + // Add two newlines at the end of the insertion. + NL_AppendTwice = 0x4, + + // Convenience value to enable both \c NL_Prepend and \c NL_PrependAnother. + NL_PrependTwice = NL_Prepend | NL_PrependAnother +}; + +/// \brief Guess the end-of-line sequence used in the given FileID. If the +/// sequence can't be guessed return an Unix-style newline. +static StringRef guessEOL(SourceManager &SM, FileID ID) { + StringRef Content = SM.getBufferData(ID); + StringRef Buffer = Content.substr(Content.find_first_of("\r\n")); + + return llvm::StringSwitch<StringRef>(Buffer) + .StartsWith("\r\n", "\r\n") + .StartsWith("\n\r", "\n\r") + .StartsWith("\r", "\r") + .Default("\n"); +} + +/// \brief Find the end of the end of the directive, either the beginning of a +/// newline or the end of file. +// +// \return The offset into the file where the directive ends along with a +// boolean value indicating whether the directive ends because the end of file +// was reached or not. +static std::pair<unsigned, bool> findDirectiveEnd(SourceLocation HashLoc, + SourceManager &SM, + const LangOptions &LangOpts) { + FileID FID = SM.getFileID(HashLoc); + unsigned Offset = SM.getFileOffset(HashLoc); + StringRef Content = SM.getBufferData(FID); + Lexer Lex(SM.getLocForStartOfFile(FID), LangOpts, Content.begin(), + Content.begin() + Offset, Content.end()); + Lex.SetCommentRetentionState(true); + Token Tok; + + // This loop look for the newline after our directive but avoids the ones part + // of a multi-line comments: + // + // #include <foo> /* long \n comment */\n + // ~~ no ~~ yes + for (;;) { + // find the beginning of the end-of-line sequence + StringRef::size_type EOLOffset = Content.find_first_of("\r\n", Offset); + // ends because EOF was reached + if (EOLOffset == StringRef::npos) + return std::make_pair(Content.size(), true); + + // find the token that contains our end-of-line + unsigned TokEnd = 0; + do { + Lex.LexFromRawLexer(Tok); + TokEnd = SM.getFileOffset(Tok.getLocation()) + Tok.getLength(); + + // happens when the whitespaces are eaten after a multiline comment + if (Tok.is(tok::eof)) + return std::make_pair(EOLOffset, false); + } while (TokEnd < EOLOffset); + + // the end-of-line is not part of a multi-line comment, return its location + if (Tok.isNot(tok::comment)) + return std::make_pair(EOLOffset, false); + + // for the next search to start after the end of this token + Offset = TokEnd; + } +} + +IncludeDirectives::IncludeDirectives(clang::CompilerInstance &CI) + : CI(CI), Sources(CI.getSourceManager()) { + // addPPCallbacks takes ownership of the callback + CI.getPreprocessor().addPPCallbacks(new IncludeDirectivesPPCallback(this)); +} + +bool IncludeDirectives::lookForInclude(const FileEntry *File, + const LocationVec &IncludeLocs, + SeenFilesSet &Seen) const { + // mark this file as visited + Seen.insert(File); + + // First check if included directly in this file + for (LocationVec::const_iterator I = IncludeLocs.begin(), + E = IncludeLocs.end(); + I != E; ++I) + if (Sources.getFileEntryForID(Sources.getFileID(*I)) == File) + return true; + + // Otherwise look recursively all the included files + FileToEntriesMap::const_iterator EntriesIt = FileToEntries.find(File); + if (EntriesIt == FileToEntries.end()) + return false; + for (EntryVec::const_iterator I = EntriesIt->second.begin(), + E = EntriesIt->second.end(); + I != E; ++I) { + // skip if this header has already been checked before + if (Seen.count(I->getIncludedFile())) + continue; + if (lookForInclude(I->getIncludedFile(), IncludeLocs, Seen)) + return true; + } + return false; +} + +bool IncludeDirectives::hasInclude(const FileEntry *File, + StringRef Include) const { + llvm::StringMap<LocationVec>::const_iterator It = + IncludeAsWrittenToLocationsMap.find(Include); + + // Include isn't included in any file + if (It == IncludeAsWrittenToLocationsMap.end()) + return false; + + SeenFilesSet Seen; + return lookForInclude(File, It->getValue(), Seen); +} + +Replacement IncludeDirectives::addAngledInclude(const clang::FileEntry *File, + llvm::StringRef Include) { + FileID FID = Sources.translateFile(File); + assert(!FID.isInvalid() && "Invalid file entry given!"); + + if (hasInclude(File, Include)) + return Replacement(); + + unsigned Offset, NLFlags; + llvm::tie(Offset, NLFlags) = angledIncludeInsertionOffset(FID); + + StringRef EOL = guessEOL(Sources, FID); + llvm::SmallString<32> InsertionText; + if (NLFlags & NL_Prepend) + InsertionText += EOL; + if (NLFlags & NL_PrependAnother) + InsertionText += EOL; + InsertionText += "#include <"; + InsertionText += Include; + InsertionText += ">"; + if (NLFlags & NL_AppendTwice) { + InsertionText += EOL; + InsertionText += EOL; + } + return Replacement(File->getName(), Offset, 0, InsertionText); +} + +Replacement IncludeDirectives::addAngledInclude(llvm::StringRef File, + llvm::StringRef Include) { + const FileEntry *Entry = Sources.getFileManager().getFile(File); + assert(Entry && "Invalid file given!"); + return addAngledInclude(Entry, Include); +} + +std::pair<unsigned, unsigned> +IncludeDirectives::findFileHeaderEndOffset(FileID FID) const { + unsigned NLFlags = NL_Prepend; + StringRef Content = Sources.getBufferData(FID); + Lexer Lex(Sources.getLocForStartOfFile(FID), CI.getLangOpts(), + Content.begin(), Content.begin(), Content.end()); + Lex.SetCommentRetentionState(true); + Lex.SetKeepWhitespaceMode(true); + + // find the first newline not part of a multi-line comment + Token Tok; + do { + Lex.LexFromRawLexer(Tok); + unsigned Offset = Sources.getFileOffset(Tok.getLocation()); + // allow one newline between the comments + if (Tok.is(tok::unknown) && isWhitespace(Content[Offset])) { + StringRef Whitespaces(Content.substr(Offset, Tok.getLength())); + if (Whitespaces.count('\n') == 1 || Whitespaces.count('\r') == 1) + Lex.LexFromRawLexer(Tok); + else { + // add an empty line to separate the file header and the inclusion + NLFlags = NL_PrependTwice; + } + } + } while (Tok.is(tok::comment)); + + // apparently there is no header, insertion point is the beginning of the file + if (Tok.isNot(tok::unknown)) + return std::make_pair(0, NL_AppendTwice); + return std::make_pair(Sources.getFileOffset(Tok.getLocation()), NLFlags); +} + +SourceLocation +IncludeDirectives::angledIncludeHintLoc(FileID FID) const { + FileToEntriesMap::const_iterator EntriesIt = + FileToEntries.find(Sources.getFileEntryForID(FID)); + + if (EntriesIt == FileToEntries.end()) + return SourceLocation(); + + HeaderSearch &HeaderInfo = CI.getPreprocessor().getHeaderSearchInfo(); + const EntryVec &Entries = EntriesIt->second; + EntryVec::const_reverse_iterator QuotedCandidate = Entries.rend(); + for (EntryVec::const_reverse_iterator I = Entries.rbegin(), + E = Entries.rend(); + I != E; ++I) { + // Headers meant for multiple inclusion can potentially appears in the + // middle of the code thus making them a poor choice for an insertion point. + if (!HeaderInfo.isFileMultipleIncludeGuarded(I->getIncludedFile())) + continue; + + // return preferably the last angled include + if (I->isAngled()) + return I->getHashLocation(); + + // keep track of the last quoted include that is guarded + if (QuotedCandidate == Entries.rend()) + QuotedCandidate = I; + } + + if (QuotedCandidate == Entries.rend()) + return SourceLocation(); + + // return the last quoted-include if we couldn't find an angled one + return QuotedCandidate->getHashLocation(); +} + +std::pair<unsigned, unsigned> +IncludeDirectives::angledIncludeInsertionOffset(FileID FID) const { + SourceLocation Hint = angledIncludeHintLoc(FID); + unsigned NL_Flags = NL_Prepend; + + // If we can't find a similar include and we are in a header check if it's a + // guarded header. If so the hint will be the location of the #define from the + // guard. + if (Hint.isInvalid()) { + const FileEntry *File = Sources.getFileEntryForID(FID); + HeaderToGuardMap::const_iterator GuardIt = HeaderToGuard.find(File); + if (GuardIt != HeaderToGuard.end()) { + // get the hash location from the #define + Hint = GuardIt->second; + // we want a blank line between the #define and the #include + NL_Flags = NL_PrependTwice; + } + } + + // no hints, insertion is done after the file header + if (Hint.isInvalid()) + return findFileHeaderEndOffset(FID); + + unsigned Offset = findDirectiveEnd(Hint, Sources, CI.getLangOpts()).first; + return std::make_pair(Offset, NL_Flags); +} diff --git a/clang-tools-extra/cpp11-migrate/Core/IncludeDirectives.h b/clang-tools-extra/cpp11-migrate/Core/IncludeDirectives.h new file mode 100644 index 00000000000..c1c5a7acf77 --- /dev/null +++ b/clang-tools-extra/cpp11-migrate/Core/IncludeDirectives.h @@ -0,0 +1,141 @@ +//===-- Core/IncludeDirectives.h - Include directives handling --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares the IncludeDirectives class that helps with +/// detecting and modifying \#include directives. +/// +//===----------------------------------------------------------------------===// + +#ifndef CPP11_MIGRATE_INCLUDE_DIRECTIVES_H +#define CPP11_MIGRATE_INCLUDE_DIRECTIVES_H + +#include "clang/Basic/SourceLocation.h" +#include "clang/Tooling/Refactoring.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <vector> + +namespace clang { +class Preprocessor; +} // namespace clang + +/// \brief Support for include directives handling. +/// +/// This class should be created with a \c clang::CompilerInstance before the +/// file is preprocessed in order to collect the inclusion information. It can +/// be queried as long as the compiler instance is valid. +class IncludeDirectives { +public: + IncludeDirectives(clang::CompilerInstance &CI); + + /// \brief Add an angled include to a the given file. + /// + /// \param File A file accessible by a SourceManager + /// \param Include The include file as it should be written in the code. + /// + /// \returns + /// \li A null Replacement (check using \c Replacement::isApplicable()), if + /// the \c Include is already visible from \c File. + /// \li Otherwise, a non-null Replacement that, when applied, inserts an + /// \c \#include into \c File. + clang::tooling::Replacement addAngledInclude(llvm::StringRef File, + llvm::StringRef Include); + clang::tooling::Replacement addAngledInclude(const clang::FileEntry *File, + llvm::StringRef Include); + + /// \brief Check if \p Include is included by \p File or any of the files + /// \p File includes. + bool hasInclude(const clang::FileEntry *File, llvm::StringRef Include) const; + +private: + friend class IncludeDirectivesPPCallback; + + /// \brief Contains information about an inclusion. + class Entry { + public: + Entry(clang::SourceLocation HashLoc, const clang::FileEntry *IncludedFile, + bool Angled) + : HashLoc(HashLoc), IncludedFile(IncludedFile), Angled(Angled) {} + + /// \brief The location of the '#'. + clang::SourceLocation getHashLocation() const { return HashLoc; } + + /// \brief The file included by this include directive. + const clang::FileEntry *getIncludedFile() const { return IncludedFile; } + + /// \brief \c true if the include use angle brackets, \c false otherwise + /// when using of quotes. + bool isAngled() const { return Angled; } + + private: + clang::SourceLocation HashLoc; + const clang::FileEntry *IncludedFile; + bool Angled; + }; + + // A list of entries. + typedef std::vector<Entry> EntryVec; + + // A list of source locations. + typedef std::vector<clang::SourceLocation> LocationVec; + + // Associates files to their includes. + typedef llvm::DenseMap<const clang::FileEntry *, EntryVec> FileToEntriesMap; + + // Associates headers to their include guards if any. The location is the + // location of the hash from the #define. + typedef llvm::DenseMap<const clang::FileEntry *, clang::SourceLocation> + HeaderToGuardMap; + + /// \brief Type used by \c lookForInclude() to keep track of the files that + /// have already been processed. + typedef llvm::SmallPtrSet<const clang::FileEntry *, 32> SeenFilesSet; + + /// \brief Recursively look if an include is included by \p File or any of the + /// headers \p File includes. + /// + /// \param File The file where to start the search. + /// \param IncludeLocs These are the hash locations of the \#include + /// directives we are looking for. + /// \param Seen Used to avoid visiting a same file more than once during the + /// recursion. + bool lookForInclude(const clang::FileEntry *File, + const LocationVec &IncludeLocs, SeenFilesSet &Seen) const; + + /// \brief Find the end of a file header and returns a pair (FileOffset, + /// NewLineFlags). + /// + /// Source files often contain a file header (copyright, license, explanation + /// of the file content). An \#include should preferrably be put after this. + std::pair<unsigned, unsigned> + findFileHeaderEndOffset(clang::FileID FID) const; + + /// \brief Finds the offset where an angled include should be added and + /// returns a pair (FileOffset, NewLineFlags). + std::pair<unsigned, unsigned> + angledIncludeInsertionOffset(clang::FileID FID) const; + + /// \brief Find the location of an include directive that can be used to + /// insert an inclusion after. + /// + /// If no such include exists returns a null SourceLocation. + clang::SourceLocation angledIncludeHintLoc(clang::FileID FID) const; + + clang::CompilerInstance &CI; + clang::SourceManager &Sources; + FileToEntriesMap FileToEntries; + // maps include filename as written in the source code to the source locations + // where it appears + llvm::StringMap<LocationVec> IncludeAsWrittenToLocationsMap; + HeaderToGuardMap HeaderToGuard; +}; + +#endif // CPP11_MIGRATE_INCLUDE_DIRECTIVES_H |