diff options
Diffstat (limited to 'clang-tools-extra/clang-modernize/LoopConvert')
10 files changed, 2248 insertions, 0 deletions
diff --git a/clang-tools-extra/clang-modernize/LoopConvert/LoopActions.cpp b/clang-tools-extra/clang-modernize/LoopConvert/LoopActions.cpp new file mode 100644 index 00000000000..4dab6aba75f --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/LoopActions.cpp @@ -0,0 +1,1135 @@ +//===-- LoopConvert/LoopActions.cpp - C++11 For loop migration ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines matchers and callbacks for use in migrating C++ +/// for loops. +/// +//===----------------------------------------------------------------------===// + +#include "LoopActions.h" +#include "LoopMatchers.h" +#include "VariableNaming.h" +#include "clang/Lex/Lexer.h" + +using namespace clang::ast_matchers; +using namespace clang::tooling; +using namespace clang; + +/// \brief The information needed to describe a valid convertible usage +/// of an array index or iterator. +struct Usage { + const Expr *E; + bool IsArrow; + SourceRange Range; + + explicit Usage(const Expr *E) + : E(E), IsArrow(false), Range(E->getSourceRange()) { } + Usage(const Expr *E, bool IsArrow, SourceRange Range) + : E(E), IsArrow(IsArrow), Range(Range) { } +}; + +/// \brief A class to encapsulate lowering of the tool's confidence level. +/// +/// Confidence is a quantity opposite in meaning to Risk. Since clang-modernize +/// uses risk, this class reverses the meaning for the legacy loop convert +/// code. +class Confidence { +public: + /// \brief Initialize confidence level. + explicit Confidence(RiskLevel Level) : + CurrentLevel(Level) {} + + /// \brief Lower the internal confidence level to Level, but do not raise it. + void lowerTo(RiskLevel Level) { + CurrentLevel = std::max(Level, CurrentLevel); + } + + /// \brief Return the internal confidence level. + RiskLevel getRiskLevel() const { return CurrentLevel; } + +private: + RiskLevel CurrentLevel; +}; + +/// \brief Discover usages of expressions consisting of index or iterator +/// access. +/// +/// Given an index variable, recursively crawls a for loop to discover if the +/// index variable is used in a way consistent with range-based for loop access. +class ForLoopIndexUseVisitor + : public RecursiveASTVisitor<ForLoopIndexUseVisitor> { + public: + ForLoopIndexUseVisitor(ASTContext *Context, const VarDecl *IndexVar, + const VarDecl *EndVar, const Expr *ContainerExpr, + const Expr *ArrayBoundExpr, + bool ContainerNeedsDereference) : + Context(Context), IndexVar(IndexVar), EndVar(EndVar), + ContainerExpr(ContainerExpr), ArrayBoundExpr(ArrayBoundExpr), + ContainerNeedsDereference(ContainerNeedsDereference), + OnlyUsedAsIndex(true), AliasDecl(NULL), ConfidenceLevel(RL_Safe), + NextStmtParent(NULL), CurrStmtParent(NULL), ReplaceWithAliasUse(false), + AliasFromForInit(false) { + if (ContainerExpr) { + addComponent(ContainerExpr); + llvm::FoldingSetNodeID ID; + const Expr *E = ContainerExpr->IgnoreParenImpCasts(); + E->Profile(ID, *Context, true); + } + } + + /// \brief Finds all uses of IndexVar in Body, placing all usages in Usages, + /// and returns true if IndexVar was only used in a way consistent with a + /// range-based for loop. + /// + /// The general strategy is to reject any DeclRefExprs referencing IndexVar, + /// with the exception of certain acceptable patterns. + /// For arrays, the DeclRefExpr for IndexVar must appear as the index of an + /// ArraySubscriptExpression. Iterator-based loops may dereference + /// IndexVar or call methods through operator-> (builtin or overloaded). + /// Array-like containers may use IndexVar as a parameter to the at() member + /// function and in overloaded operator[]. + bool findAndVerifyUsages(const Stmt *Body) { + TraverseStmt(const_cast<Stmt *>(Body)); + return OnlyUsedAsIndex && ContainerExpr; + } + + /// \brief Add a set of components that we should consider relevant to the + /// container. + void addComponents(const ComponentVector &Components) { + // FIXME: add sort(on ID)+unique to avoid extra work. + for (ComponentVector::const_iterator I = Components.begin(), + E = Components.end(); I != E; ++I) + addComponent(*I); + } + + /// \brief Accessor for Usages. + const UsageResult &getUsages() const { return Usages; } + + /// \brief Get the container indexed by IndexVar, if any. + const Expr *getContainerIndexed() const { + return ContainerExpr; + } + + /// \brief Returns the statement declaring the variable created as an alias + /// for the loop element, if any. + const DeclStmt *getAliasDecl() const { return AliasDecl; } + + /// \brief Accessor for ConfidenceLevel. + RiskLevel getRiskLevel() const { + return ConfidenceLevel.getRiskLevel(); + } + + /// \brief Indicates if the alias declaration was in a place where it cannot + /// simply be removed but rather replaced with a use of the alias variable. + /// For example, variables declared in the condition of an if, switch, or for + /// stmt. + bool aliasUseRequired() const { return ReplaceWithAliasUse; } + + /// \brief Indicates if the alias declaration came from the init clause of a + /// nested for loop. SourceRanges provided by Clang for DeclStmts in this + /// case need to be adjusted. + bool aliasFromForInit() const { return AliasFromForInit; } + + private: + /// Typedef used in CRTP functions. + typedef RecursiveASTVisitor<ForLoopIndexUseVisitor> VisitorBase; + friend class RecursiveASTVisitor<ForLoopIndexUseVisitor>; + + /// Overriden methods for RecursiveASTVisitor's traversal. + bool TraverseArraySubscriptExpr(ArraySubscriptExpr *E); + bool TraverseCXXMemberCallExpr(CXXMemberCallExpr *MemberCall); + bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *OpCall); + bool TraverseMemberExpr(MemberExpr *Member); + bool TraverseUnaryDeref(UnaryOperator *Uop); + bool VisitDeclRefExpr(DeclRefExpr *E); + bool VisitDeclStmt(DeclStmt *S); + bool TraverseStmt(Stmt *S); + + /// \brief Add an expression to the list of expressions on which the container + /// expression depends. + void addComponent(const Expr *E) { + llvm::FoldingSetNodeID ID; + const Expr *Node = E->IgnoreParenImpCasts(); + Node->Profile(ID, *Context, true); + DependentExprs.push_back(std::make_pair(Node, ID)); + } + + // Input member variables: + ASTContext *Context; + /// The index variable's VarDecl. + const VarDecl *IndexVar; + /// The loop's 'end' variable, which cannot be mentioned at all. + const VarDecl *EndVar; + /// The Expr which refers to the container. + const Expr *ContainerExpr; + /// The Expr which refers to the terminating condition for array-based loops. + const Expr *ArrayBoundExpr; + bool ContainerNeedsDereference; + + // Output member variables: + /// A container which holds all usages of IndexVar as the index of + /// ArraySubscriptExpressions. + UsageResult Usages; + bool OnlyUsedAsIndex; + /// The DeclStmt for an alias to the container element. + const DeclStmt *AliasDecl; + Confidence ConfidenceLevel; + /// \brief A list of expressions on which ContainerExpr depends. + /// + /// If any of these expressions are encountered outside of an acceptable usage + /// of the loop element, lower our confidence level. + llvm::SmallVector< + std::pair<const Expr *, llvm::FoldingSetNodeID>, 16> DependentExprs; + + /// The parent-in-waiting. Will become the real parent once we traverse down + /// one level in the AST. + const Stmt *NextStmtParent; + /// The actual parent of a node when Visit*() calls are made. Only the + /// parentage of DeclStmt's to possible iteration/selection statements is of + /// importance. + const Stmt *CurrStmtParent; + + /// \see aliasUseRequired(). + bool ReplaceWithAliasUse; + /// \see aliasFromForInit(). + bool AliasFromForInit; +}; + +/// \brief Obtain the original source code text from a SourceRange. +static StringRef getStringFromRange(SourceManager &SourceMgr, + const LangOptions &LangOpts, + SourceRange Range) { + if (SourceMgr.getFileID(Range.getBegin()) != + SourceMgr.getFileID(Range.getEnd())) + return NULL; + + CharSourceRange SourceChars(Range, true); + return Lexer::getSourceText(SourceChars, SourceMgr, LangOpts); +} + +/// \brief Returns the DeclRefExpr represented by E, or NULL if there isn't one. +static const DeclRefExpr *getDeclRef(const Expr *E) { + return dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()); +} + +/// \brief If the given expression is actually a DeclRefExpr, find and return +/// the underlying VarDecl; otherwise, return NULL. +static const VarDecl *getReferencedVariable(const Expr *E) { + if (const DeclRefExpr *DRE = getDeclRef(E)) + return dyn_cast<VarDecl>(DRE->getDecl()); + return NULL; +} + +/// \brief Returns true when the given expression is a member expression +/// whose base is `this` (implicitly or not). +static bool isDirectMemberExpr(const Expr *E) { + if (const MemberExpr *Member = dyn_cast<MemberExpr>(E->IgnoreParenImpCasts())) + return isa<CXXThisExpr>(Member->getBase()->IgnoreParenImpCasts()); + return false; +} + +/// \brief Returns true when two ValueDecls are the same variable. +static bool areSameVariable(const ValueDecl *First, const ValueDecl *Second) { + return First && Second && + First->getCanonicalDecl() == Second->getCanonicalDecl(); +} + +/// \brief Determines if an expression is a declaration reference to a +/// particular variable. +static bool exprReferencesVariable(const ValueDecl *Target, const Expr *E) { + if (!Target || !E) + return false; + const DeclRefExpr *Decl = getDeclRef(E); + return Decl && areSameVariable(Target, Decl->getDecl()); +} + +/// \brief Returns true when two Exprs are equivalent. +static bool areSameExpr(ASTContext *Context, const Expr *First, + const Expr *Second) { + if (!First || !Second) + return false; + + llvm::FoldingSetNodeID FirstID, SecondID; + First->Profile(FirstID, *Context, true); + Second->Profile(SecondID, *Context, true); + return FirstID == SecondID; +} + +/// \brief Look through conversion/copy constructors to find the explicit +/// initialization expression, returning it is found. +/// +/// The main idea is that given +/// vector<int> v; +/// we consider either of these initializations +/// vector<int>::iterator it = v.begin(); +/// vector<int>::iterator it(v.begin()); +/// and retrieve `v.begin()` as the expression used to initialize `it` but do +/// not include +/// vector<int>::iterator it; +/// vector<int>::iterator it(v.begin(), 0); // if this constructor existed +/// as being initialized from `v.begin()` +static const Expr *digThroughConstructors(const Expr *E) { + if (!E) + return NULL; + E = E->IgnoreParenImpCasts(); + if (const CXXConstructExpr *ConstructExpr = dyn_cast<CXXConstructExpr>(E)) { + // The initial constructor must take exactly one parameter, but base class + // and deferred constructors can take more. + if (ConstructExpr->getNumArgs() != 1 || + ConstructExpr->getConstructionKind() != CXXConstructExpr::CK_Complete) + return NULL; + E = ConstructExpr->getArg(0); + if (const MaterializeTemporaryExpr *Temp = + dyn_cast<MaterializeTemporaryExpr>(E)) + E = Temp->GetTemporaryExpr(); + return digThroughConstructors(E); + } + return E; +} + +/// \brief If the expression is a dereference or call to operator*(), return the +/// operand. Otherwise, return NULL. +static const Expr *getDereferenceOperand(const Expr *E) { + if (const UnaryOperator *Uop = dyn_cast<UnaryOperator>(E)) + return Uop->getOpcode() == UO_Deref ? Uop->getSubExpr() : NULL; + + if (const CXXOperatorCallExpr *OpCall = dyn_cast<CXXOperatorCallExpr>(E)) + return OpCall->getOperator() == OO_Star && OpCall->getNumArgs() == 1 ? + OpCall->getArg(0) : NULL; + + return NULL; +} + +/// \brief Returns true when the Container contains an Expr equivalent to E. +template<typename ContainerT> +static bool containsExpr(ASTContext *Context, const ContainerT *Container, + const Expr *E) { + llvm::FoldingSetNodeID ID; + E->Profile(ID, *Context, true); + for (typename ContainerT::const_iterator I = Container->begin(), + End = Container->end(); I != End; ++I) + if (ID == I->second) + return true; + return false; +} + +/// \brief Returns true when the index expression is a declaration reference to +/// IndexVar. +/// +/// If the index variable is `index`, this function returns true on +/// arrayExpression[index]; +/// containerExpression[index]; +/// but not +/// containerExpression[notIndex]; +static bool isIndexInSubscriptExpr(const Expr *IndexExpr, + const VarDecl *IndexVar) { + const DeclRefExpr *Idx = getDeclRef(IndexExpr); + return Idx && Idx->getType()->isIntegerType() + && areSameVariable(IndexVar, Idx->getDecl()); +} + +/// \brief Returns true when the index expression is a declaration reference to +/// IndexVar, Obj is the same expression as SourceExpr after all parens and +/// implicit casts are stripped off. +/// +/// If PermitDeref is true, IndexExpression may +/// be a dereference (overloaded or builtin operator*). +/// +/// This function is intended for array-like containers, as it makes sure that +/// both the container and the index match. +/// If the loop has index variable `index` and iterates over `container`, then +/// isIndexInSubscriptExpr returns true for +/// \code +/// container[index] +/// container.at(index) +/// container->at(index) +/// \endcode +/// but not for +/// \code +/// container[notIndex] +/// notContainer[index] +/// \endcode +/// If PermitDeref is true, then isIndexInSubscriptExpr additionally returns +/// true on these expressions: +/// \code +/// (*container)[index] +/// (*container).at(index) +/// \endcode +static bool isIndexInSubscriptExpr(ASTContext *Context, const Expr *IndexExpr, + const VarDecl *IndexVar, const Expr *Obj, + const Expr *SourceExpr, bool PermitDeref) { + if (!SourceExpr || !Obj || !isIndexInSubscriptExpr(IndexExpr, IndexVar)) + return false; + + if (areSameExpr(Context, SourceExpr->IgnoreParenImpCasts(), + Obj->IgnoreParenImpCasts())) + return true; + + if (const Expr *InnerObj = getDereferenceOperand(Obj->IgnoreParenImpCasts())) + if (PermitDeref && areSameExpr(Context, SourceExpr->IgnoreParenImpCasts(), + InnerObj->IgnoreParenImpCasts())) + return true; + + return false; +} + +/// \brief Returns true when Opcall is a call a one-parameter dereference of +/// IndexVar. +/// +/// For example, if the index variable is `index`, returns true for +/// *index +/// but not +/// index +/// *notIndex +static bool isDereferenceOfOpCall(const CXXOperatorCallExpr *OpCall, + const VarDecl *IndexVar) { + return OpCall->getOperator() == OO_Star && OpCall->getNumArgs() == 1 && + exprReferencesVariable(IndexVar, OpCall->getArg(0)); +} + +/// \brief Returns true when Uop is a dereference of IndexVar. +/// +/// For example, if the index variable is `index`, returns true for +/// *index +/// but not +/// index +/// *notIndex +static bool isDereferenceOfUop(const UnaryOperator *Uop, + const VarDecl *IndexVar) { + return Uop->getOpcode() == UO_Deref && + exprReferencesVariable(IndexVar, Uop->getSubExpr()); +} + +/// \brief Determines whether the given Decl defines a variable initialized to +/// the loop object. +/// +/// This is intended to find cases such as +/// \code +/// for (int i = 0; i < arraySize(arr); ++i) { +/// T t = arr[i]; +/// // use t, do not use i +/// } +/// \endcode +/// and +/// \code +/// for (iterator i = container.begin(), e = container.end(); i != e; ++i) { +/// T t = *i; +/// // use t, do not use i +/// } +/// \endcode +static bool isAliasDecl(const Decl *TheDecl, const VarDecl *IndexVar) { + const VarDecl *VDecl = dyn_cast<VarDecl>(TheDecl); + if (!VDecl) + return false; + if (!VDecl->hasInit()) + return false; + const Expr *Init = + digThroughConstructors(VDecl->getInit()->IgnoreParenImpCasts()); + if (!Init) + return false; + + switch (Init->getStmtClass()) { + case Stmt::ArraySubscriptExprClass: { + const ArraySubscriptExpr *E = cast<ArraySubscriptExpr>(Init); + // We don't really care which array is used here. We check to make sure + // it was the correct one later, since the AST will traverse it next. + return isIndexInSubscriptExpr(E->getIdx(), IndexVar); + } + + case Stmt::UnaryOperatorClass: + return isDereferenceOfUop(cast<UnaryOperator>(Init), IndexVar); + + case Stmt::CXXOperatorCallExprClass: { + const CXXOperatorCallExpr *OpCall = cast<CXXOperatorCallExpr>(Init); + if (OpCall->getOperator() == OO_Star) + return isDereferenceOfOpCall(OpCall, IndexVar); + break; + } + + default: + break; + } + return false; +} + +/// \brief Determines whether the bound of a for loop condition expression is +/// the same as the statically computable size of ArrayType. +/// +/// Given +/// \code +/// const int N = 5; +/// int arr[N]; +/// \endcode +/// This is intended to permit +/// \code +/// for (int i = 0; i < N; ++i) { /* use arr[i] */ } +/// for (int i = 0; i < arraysize(arr); ++i) { /* use arr[i] */ } +/// \endcode +static bool arrayMatchesBoundExpr(ASTContext *Context, + const QualType &ArrayType, + const Expr *ConditionExpr) { + if (!ConditionExpr || ConditionExpr->isValueDependent()) + return false; + const ConstantArrayType *ConstType = + Context->getAsConstantArrayType(ArrayType); + if (!ConstType) + return false; + llvm::APSInt ConditionSize; + if (!ConditionExpr->isIntegerConstantExpr(ConditionSize, *Context)) + return false; + llvm::APSInt ArraySize(ConstType->getSize()); + return llvm::APSInt::isSameValue(ConditionSize, ArraySize); +} + +/// \brief If the unary operator is a dereference of IndexVar, include it +/// as a valid usage and prune the traversal. +/// +/// For example, if container.begin() and container.end() both return pointers +/// to int, this makes sure that the initialization for `k` is not counted as an +/// unconvertible use of the iterator `i`. +/// \code +/// for (int *i = container.begin(), *e = container.end(); i != e; ++i) { +/// int k = *i + 2; +/// } +/// \endcode +bool ForLoopIndexUseVisitor::TraverseUnaryDeref(UnaryOperator *Uop) { + // If we dereference an iterator that's actually a pointer, count the + // occurrence. + if (isDereferenceOfUop(Uop, IndexVar)) { + Usages.push_back(Usage(Uop)); + return true; + } + + return VisitorBase::TraverseUnaryOperator(Uop); +} + +/// \brief If the member expression is operator-> (overloaded or not) on +/// IndexVar, include it as a valid usage and prune the traversal. +/// +/// For example, given +/// \code +/// struct Foo { int bar(); int x; }; +/// vector<Foo> v; +/// \endcode +/// the following uses will be considered convertible: +/// \code +/// for (vector<Foo>::iterator i = v.begin(), e = v.end(); i != e; ++i) { +/// int b = i->bar(); +/// int k = i->x + 1; +/// } +/// \endcode +/// though +/// \code +/// for (vector<Foo>::iterator i = v.begin(), e = v.end(); i != e; ++i) { +/// int k = i.insert(1); +/// } +/// for (vector<Foo>::iterator i = v.begin(), e = v.end(); i != e; ++i) { +/// int b = e->bar(); +/// } +/// \endcode +/// will not. +bool ForLoopIndexUseVisitor::TraverseMemberExpr(MemberExpr *Member) { + const Expr *Base = Member->getBase(); + const DeclRefExpr *Obj = getDeclRef(Base); + const Expr *ResultExpr = Member; + QualType ExprType; + if (const CXXOperatorCallExpr *Call = + dyn_cast<CXXOperatorCallExpr>(Base->IgnoreParenImpCasts())) { + // If operator->() is a MemberExpr containing a CXXOperatorCallExpr, then + // the MemberExpr does not have the expression we want. We therefore catch + // that instance here. + // For example, if vector<Foo>::iterator defines operator->(), then the + // example `i->bar()` at the top of this function is a CXXMemberCallExpr + // referring to `i->` as the member function called. We want just `i`, so + // we take the argument to operator->() as the base object. + if(Call->getOperator() == OO_Arrow) { + assert(Call->getNumArgs() == 1 && + "Operator-> takes more than one argument"); + Obj = getDeclRef(Call->getArg(0)); + ResultExpr = Obj; + ExprType = Call->getCallReturnType(); + } + } + + if (Member->isArrow() && Obj && exprReferencesVariable(IndexVar, Obj)) { + if (ExprType.isNull()) + ExprType = Obj->getType(); + + assert(ExprType->isPointerType() && "Operator-> returned non-pointer type"); + // FIXME: This works around not having the location of the arrow operator. + // Consider adding OperatorLoc to MemberExpr? + SourceLocation ArrowLoc = + Lexer::getLocForEndOfToken(Base->getExprLoc(), 0, + Context->getSourceManager(), + Context->getLangOpts()); + // If something complicated is happening (i.e. the next token isn't an + // arrow), give up on making this work. + if (!ArrowLoc.isInvalid()) { + Usages.push_back(Usage(ResultExpr, /*IsArrow=*/true, + SourceRange(Base->getExprLoc(), ArrowLoc))); + return true; + } + } + return TraverseStmt(Member->getBase()); +} + +/// \brief If a member function call is the at() accessor on the container with +/// IndexVar as the single argument, include it as a valid usage and prune +/// the traversal. +/// +/// Member calls on other objects will not be permitted. +/// Calls on the iterator object are not permitted, unless done through +/// operator->(). The one exception is allowing vector::at() for pseudoarrays. +bool ForLoopIndexUseVisitor::TraverseCXXMemberCallExpr( + CXXMemberCallExpr *MemberCall) { + MemberExpr *Member = + dyn_cast<MemberExpr>(MemberCall->getCallee()->IgnoreParenImpCasts()); + if (!Member) + return VisitorBase::TraverseCXXMemberCallExpr(MemberCall); + // We specifically allow an accessor named "at" to let STL in, though + // this is restricted to pseudo-arrays by requiring a single, integer + // argument. + const IdentifierInfo *Ident = Member->getMemberDecl()->getIdentifier(); + if (Ident && Ident->isStr("at") && MemberCall->getNumArgs() == 1) { + if (isIndexInSubscriptExpr(Context, MemberCall->getArg(0), IndexVar, + Member->getBase(), ContainerExpr, + ContainerNeedsDereference)) { + Usages.push_back(Usage(MemberCall)); + return true; + } + } + + if (containsExpr(Context, &DependentExprs, Member->getBase())) + ConfidenceLevel.lowerTo(RL_Risky); + + return VisitorBase::TraverseCXXMemberCallExpr(MemberCall); +} + +/// \brief If an overloaded operator call is a dereference of IndexVar or +/// a subscript of a the container with IndexVar as the single argument, +/// include it as a valid usage and prune the traversal. +/// +/// For example, given +/// \code +/// struct Foo { int bar(); int x; }; +/// vector<Foo> v; +/// void f(Foo); +/// \endcode +/// the following uses will be considered convertible: +/// \code +/// for (vector<Foo>::iterator i = v.begin(), e = v.end(); i != e; ++i) { +/// f(*i); +/// } +/// for (int i = 0; i < v.size(); ++i) { +/// int i = v[i] + 1; +/// } +/// \endcode +bool ForLoopIndexUseVisitor::TraverseCXXOperatorCallExpr( + CXXOperatorCallExpr *OpCall) { + switch (OpCall->getOperator()) { + case OO_Star: + if (isDereferenceOfOpCall(OpCall, IndexVar)) { + Usages.push_back(Usage(OpCall)); + return true; + } + break; + + case OO_Subscript: + if (OpCall->getNumArgs() != 2) + break; + if (isIndexInSubscriptExpr(Context, OpCall->getArg(1), IndexVar, + OpCall->getArg(0), ContainerExpr, + ContainerNeedsDereference)) { + Usages.push_back(Usage(OpCall)); + return true; + } + break; + + default: + break; + } + return VisitorBase::TraverseCXXOperatorCallExpr(OpCall); +} + +/// \brief If we encounter an array with IndexVar as the index of an +/// ArraySubsriptExpression, note it as a consistent usage and prune the +/// AST traversal. +/// +/// For example, given +/// \code +/// const int N = 5; +/// int arr[N]; +/// \endcode +/// This is intended to permit +/// \code +/// for (int i = 0; i < N; ++i) { /* use arr[i] */ } +/// \endcode +/// but not +/// \code +/// for (int i = 0; i < N; ++i) { /* use notArr[i] */ } +/// \endcode +/// and further checking needs to be done later to ensure that exactly one array +/// is referenced. +bool ForLoopIndexUseVisitor::TraverseArraySubscriptExpr( + ArraySubscriptExpr *E) { + Expr *Arr = E->getBase(); + if (!isIndexInSubscriptExpr(E->getIdx(), IndexVar)) + return VisitorBase::TraverseArraySubscriptExpr(E); + + if ((ContainerExpr && !areSameExpr(Context, Arr->IgnoreParenImpCasts(), + ContainerExpr->IgnoreParenImpCasts())) + || !arrayMatchesBoundExpr(Context, Arr->IgnoreImpCasts()->getType(), + ArrayBoundExpr)) { + // If we have already discovered the array being indexed and this isn't it + // or this array doesn't match, mark this loop as unconvertible. + OnlyUsedAsIndex = false; + return VisitorBase::TraverseArraySubscriptExpr(E); + } + + if (!ContainerExpr) + ContainerExpr = Arr; + + Usages.push_back(Usage(E)); + return true; +} + +/// \brief If we encounter a reference to IndexVar in an unpruned branch of the +/// traversal, mark this loop as unconvertible. +/// +/// This implements the whitelist for convertible loops: any usages of IndexVar +/// not explicitly considered convertible by this traversal will be caught by +/// this function. +/// +/// Additionally, if the container expression is more complex than just a +/// DeclRefExpr, and some part of it is appears elsewhere in the loop, lower +/// our confidence in the transformation. +/// +/// For example, these are not permitted: +/// \code +/// for (int i = 0; i < N; ++i) { printf("arr[%d] = %d", i, arr[i]); } +/// for (vector<int>::iterator i = container.begin(), e = container.end(); +/// i != e; ++i) +/// i.insert(0); +/// for (vector<int>::iterator i = container.begin(), e = container.end(); +/// i != e; ++i) +/// i.insert(0); +/// for (vector<int>::iterator i = container.begin(), e = container.end(); +/// i != e; ++i) +/// if (i + 1 != e) +/// printf("%d", *i); +/// \endcode +/// +/// And these will raise the risk level: +/// \code +/// int arr[10][20]; +/// int l = 5; +/// for (int j = 0; j < 20; ++j) +/// int k = arr[l][j] + l; // using l outside arr[l] is considered risky +/// for (int i = 0; i < obj.getVector().size(); ++i) +/// obj.foo(10); // using `obj` is considered risky +/// \endcode +bool ForLoopIndexUseVisitor::VisitDeclRefExpr(DeclRefExpr *E) { + const ValueDecl *TheDecl = E->getDecl(); + if (areSameVariable(IndexVar, TheDecl) || areSameVariable(EndVar, TheDecl)) + OnlyUsedAsIndex = false; + if (containsExpr(Context, &DependentExprs, E)) + ConfidenceLevel.lowerTo(RL_Risky); + return true; +} + +/// \brief If we find that another variable is created just to refer to the loop +/// element, note it for reuse as the loop variable. +/// +/// See the comments for isAliasDecl. +bool ForLoopIndexUseVisitor::VisitDeclStmt(DeclStmt *S) { + if (!AliasDecl && S->isSingleDecl() && + isAliasDecl(S->getSingleDecl(), IndexVar)) { + AliasDecl = S; + if (CurrStmtParent) { + if (isa<IfStmt>(CurrStmtParent) || + isa<WhileStmt>(CurrStmtParent) || + isa<SwitchStmt>(CurrStmtParent)) + ReplaceWithAliasUse = true; + else if (isa<ForStmt>(CurrStmtParent)) { + if (cast<ForStmt>(CurrStmtParent)->getConditionVariableDeclStmt() == S) + ReplaceWithAliasUse = true; + else + // It's assumed S came the for loop's init clause. + AliasFromForInit = true; + } + } + } + + return true; +} + +bool ForLoopIndexUseVisitor::TraverseStmt(Stmt *S) { + // All this pointer swapping is a mechanism for tracking immediate parentage + // of Stmts. + const Stmt *OldNextParent = NextStmtParent; + CurrStmtParent = NextStmtParent; + NextStmtParent = S; + bool Result = VisitorBase::TraverseStmt(S); + NextStmtParent = OldNextParent; + return Result; +} + +//// \brief Apply the source transformations necessary to migrate the loop! +void LoopFixer::doConversion(ASTContext *Context, + const VarDecl *IndexVar, + const VarDecl *MaybeContainer, + StringRef ContainerString, + const UsageResult &Usages, + const DeclStmt *AliasDecl, + bool AliasUseRequired, + bool AliasFromForInit, + const ForStmt *TheLoop, + bool ContainerNeedsDereference, + bool DerefByValue, + bool DerefByConstRef) { + std::string VarName; + bool VarNameFromAlias = Usages.size() == 1 && AliasDecl; + bool AliasVarIsRef = false; + + if (VarNameFromAlias) { + const VarDecl *AliasVar = cast<VarDecl>(AliasDecl->getSingleDecl()); + VarName = AliasVar->getName().str(); + AliasVarIsRef = AliasVar->getType()->isReferenceType(); + + // We keep along the entire DeclStmt to keep the correct range here. + const SourceRange &ReplaceRange = AliasDecl->getSourceRange(); + + std::string ReplacementText; + if (AliasUseRequired) + ReplacementText = VarName; + else if (AliasFromForInit) + // FIXME: Clang includes the location of the ';' but only for DeclStmt's + // in a for loop's init clause. Need to put this ';' back while removing + // the declaration of the alias variable. This is probably a bug. + ReplacementText = ";"; + + Owner.addReplacementForCurrentTU(Replacement( + Context->getSourceManager(), + CharSourceRange::getTokenRange(ReplaceRange), ReplacementText)); + // No further replacements are made to the loop, since the iterator or index + // was used exactly once - in the initialization of AliasVar. + } else { + VariableNamer Namer(GeneratedDecls, &ParentFinder->getStmtToParentStmtMap(), + TheLoop, IndexVar, MaybeContainer, Context); + VarName = Namer.createIndexName(); + // First, replace all usages of the array subscript expression with our new + // variable. + for (UsageResult::const_iterator I = Usages.begin(), E = Usages.end(); + I != E; ++I) { + std::string ReplaceText = I->IsArrow ? VarName + "." : VarName; + ReplacedVarRanges->insert(std::make_pair(TheLoop, IndexVar)); + Owner.addReplacementForCurrentTU( + Replacement(Context->getSourceManager(), + CharSourceRange::getTokenRange(I->Range), ReplaceText)); + } + } + + // Now, we need to construct the new range expresion. + SourceRange ParenRange(TheLoop->getLParenLoc(), TheLoop->getRParenLoc()); + + QualType AutoRefType = Context->getAutoDeductType(); + + // If the new variable name is from the aliased variable, then the reference + // type for the new variable should only be used if the aliased variable was + // declared as a reference. + if (!VarNameFromAlias || AliasVarIsRef) { + // If an iterator's operator*() returns a 'T&' we can bind that to 'auto&'. + // If operator*() returns 'T' we can bind that to 'auto&&' which will deduce + // to 'T&&'. + if (DerefByValue) + AutoRefType = Context->getRValueReferenceType(AutoRefType); + else { + if (DerefByConstRef) + AutoRefType = Context->getConstType(AutoRefType); + AutoRefType = Context->getLValueReferenceType(AutoRefType); + } + } + + std::string MaybeDereference = ContainerNeedsDereference ? "*" : ""; + std::string TypeString = AutoRefType.getAsString(); + std::string Range = ("(" + TypeString + " " + VarName + " : " + + MaybeDereference + ContainerString + ")").str(); + Owner.addReplacementForCurrentTU( + Replacement(Context->getSourceManager(), + CharSourceRange::getTokenRange(ParenRange), Range)); + GeneratedDecls->insert(make_pair(TheLoop, VarName)); +} + +/// \brief Determine whether Init appears to be an initializing an iterator. +/// +/// If it is, returns the object whose begin() or end() method is called, and +/// the output parameter isArrow is set to indicate whether the initialization +/// is called via . or ->. +static const Expr *getContainerFromBeginEndCall(const Expr *Init, bool IsBegin, + bool *IsArrow) { + // FIXME: Maybe allow declaration/initialization outside of the for loop? + const CXXMemberCallExpr *TheCall = + dyn_cast_or_null<CXXMemberCallExpr>(digThroughConstructors(Init)); + if (!TheCall || TheCall->getNumArgs() != 0) + return NULL; + + const MemberExpr *Member = dyn_cast<MemberExpr>(TheCall->getCallee()); + if (!Member) + return NULL; + const std::string Name = Member->getMemberDecl()->getName(); + const std::string TargetName = IsBegin ? "begin" : "end"; + if (Name != TargetName) + return NULL; + + const Expr *SourceExpr = Member->getBase(); + if (!SourceExpr) + return NULL; + + *IsArrow = Member->isArrow(); + return SourceExpr; +} + +/// \brief Determines the container whose begin() and end() functions are called +/// for an iterator-based loop. +/// +/// BeginExpr must be a member call to a function named "begin()", and EndExpr +/// must be a member . +static const Expr *findContainer(ASTContext *Context, const Expr *BeginExpr, + const Expr *EndExpr, + bool *ContainerNeedsDereference) { + // Now that we know the loop variable and test expression, make sure they are + // valid. + bool BeginIsArrow = false; + bool EndIsArrow = false; + const Expr *BeginContainerExpr = + getContainerFromBeginEndCall(BeginExpr, /*IsBegin=*/true, &BeginIsArrow); + if (!BeginContainerExpr) + return NULL; + + const Expr *EndContainerExpr = + getContainerFromBeginEndCall(EndExpr, /*IsBegin=*/false, &EndIsArrow); + // Disallow loops that try evil things like this (note the dot and arrow): + // for (IteratorType It = Obj.begin(), E = Obj->end(); It != E; ++It) { } + if (!EndContainerExpr || BeginIsArrow != EndIsArrow || + !areSameExpr(Context, EndContainerExpr, BeginContainerExpr)) + return NULL; + + *ContainerNeedsDereference = BeginIsArrow; + return BeginContainerExpr; +} + +StringRef LoopFixer::checkDeferralsAndRejections(ASTContext *Context, + const Expr *ContainerExpr, + Confidence ConfidenceLevel, + const ForStmt *TheLoop) { + // If we already modified the range of this for loop, don't do any further + // updates on this iteration. + // FIXME: Once Replacements can detect conflicting edits, replace this + // implementation and rely on conflicting edit detection instead. + if (ReplacedVarRanges->count(TheLoop)) { + ++*DeferredChanges; + return ""; + } + + ParentFinder->gatherAncestors(Context->getTranslationUnitDecl()); + // Ensure that we do not try to move an expression dependent on a local + // variable declared inside the loop outside of it! + DependencyFinderASTVisitor + DependencyFinder(&ParentFinder->getStmtToParentStmtMap(), + &ParentFinder->getDeclToParentStmtMap(), + ReplacedVarRanges, TheLoop); + + // Not all of these are actually deferred changes. + // FIXME: Determine when the external dependency isn't an expression converted + // by another loop. + if (DependencyFinder.dependsOnInsideVariable(ContainerExpr)) { + ++*DeferredChanges; + return ""; + } + if (ConfidenceLevel.getRiskLevel() > MaxRisk) { + ++*RejectedChanges; + return ""; + } + + StringRef ContainerString; + if (isa<CXXThisExpr>(ContainerExpr->IgnoreParenImpCasts())) { + ContainerString = "this"; + } else { + ContainerString = getStringFromRange(Context->getSourceManager(), + Context->getLangOpts(), + ContainerExpr->getSourceRange()); + } + + // In case someone is using an evil macro, reject this change. + if (ContainerString.empty()) + ++*RejectedChanges; + return ContainerString; +} + +/// \brief Given that we have verified that the loop's header appears to be +/// convertible, run the complete analysis on the loop to determine if the +/// loop's body is convertible. +void LoopFixer::findAndVerifyUsages(ASTContext *Context, + const VarDecl *LoopVar, + const VarDecl *EndVar, + const Expr *ContainerExpr, + const Expr *BoundExpr, + bool ContainerNeedsDereference, + bool DerefByValue, + bool DerefByConstRef, + const ForStmt *TheLoop, + Confidence ConfidenceLevel) { + ForLoopIndexUseVisitor Finder(Context, LoopVar, EndVar, ContainerExpr, + BoundExpr, ContainerNeedsDereference); + if (ContainerExpr) { + ComponentFinderASTVisitor ComponentFinder; + ComponentFinder.findExprComponents(ContainerExpr->IgnoreParenImpCasts()); + Finder.addComponents(ComponentFinder.getComponents()); + } + + if (!Finder.findAndVerifyUsages(TheLoop->getBody())) + return; + + ConfidenceLevel.lowerTo(Finder.getRiskLevel()); + if (FixerKind == LFK_Array) { + // The array being indexed by IndexVar was discovered during traversal. + ContainerExpr = Finder.getContainerIndexed()->IgnoreParenImpCasts(); + // Very few loops are over expressions that generate arrays rather than + // array variables. Consider loops over arrays that aren't just represented + // by a variable to be risky conversions. + if (!getReferencedVariable(ContainerExpr) && + !isDirectMemberExpr(ContainerExpr)) + ConfidenceLevel.lowerTo(RL_Risky); + } + + std::string ContainerString = + checkDeferralsAndRejections(Context, ContainerExpr, + ConfidenceLevel, TheLoop); + if (ContainerString.empty()) + return; + + doConversion(Context, LoopVar, getReferencedVariable(ContainerExpr), + ContainerString, Finder.getUsages(), Finder.getAliasDecl(), + Finder.aliasUseRequired(), Finder.aliasFromForInit(), TheLoop, + ContainerNeedsDereference, DerefByValue, DerefByConstRef); + ++*AcceptedChanges; +} + +/// \brief The LoopFixer callback, which determines if loops discovered by the +/// matchers are convertible, printing information about the loops if so. +void LoopFixer::run(const MatchFinder::MatchResult &Result) { + const BoundNodes &Nodes = Result.Nodes; + Confidence ConfidenceLevel(RL_Safe); + ASTContext *Context = Result.Context; + const ForStmt *TheLoop = Nodes.getStmtAs<ForStmt>(LoopName); + + if (!Owner.isFileModifiable(Context->getSourceManager(),TheLoop->getForLoc())) + return; + + // Check that we have exactly one index variable and at most one end variable. + const VarDecl *LoopVar = Nodes.getDeclAs<VarDecl>(IncrementVarName); + const VarDecl *CondVar = Nodes.getDeclAs<VarDecl>(ConditionVarName); + const VarDecl *InitVar = Nodes.getDeclAs<VarDecl>(InitVarName); + if (!areSameVariable(LoopVar, CondVar) || !areSameVariable(LoopVar, InitVar)) + return; + const VarDecl *EndVar = Nodes.getDeclAs<VarDecl>(EndVarName); + const VarDecl *ConditionEndVar = + Nodes.getDeclAs<VarDecl>(ConditionEndVarName); + if (EndVar && !areSameVariable(EndVar, ConditionEndVar)) + return; + + // If the end comparison isn't a variable, we can try to work with the + // expression the loop variable is being tested against instead. + const CXXMemberCallExpr *EndCall = + Nodes.getStmtAs<CXXMemberCallExpr>(EndCallName); + const Expr *BoundExpr = Nodes.getStmtAs<Expr>(ConditionBoundName); + // If the loop calls end()/size() after each iteration, lower our confidence + // level. + if (FixerKind != LFK_Array && !EndVar) + ConfidenceLevel.lowerTo(RL_Reasonable); + + const Expr *ContainerExpr = NULL; + bool DerefByValue = false; + bool DerefByConstRef = false; + bool ContainerNeedsDereference = false; + // FIXME: Try to put most of this logic inside a matcher. Currently, matchers + // don't allow the right-recursive checks in digThroughConstructors. + if (FixerKind == LFK_Iterator) { + ContainerExpr = findContainer(Context, LoopVar->getInit(), + EndVar ? EndVar->getInit() : EndCall, + &ContainerNeedsDereference); + + QualType InitVarType = InitVar->getType(); + QualType CanonicalInitVarType = InitVarType.getCanonicalType(); + + const CXXMemberCallExpr *BeginCall = + Nodes.getNodeAs<CXXMemberCallExpr>(BeginCallName); + assert(BeginCall != 0 && "Bad Callback. No begin call expression."); + QualType CanonicalBeginType = + BeginCall->getMethodDecl()->getResultType().getCanonicalType(); + + if (CanonicalBeginType->isPointerType() && + CanonicalInitVarType->isPointerType()) { + QualType BeginPointeeType = CanonicalBeginType->getPointeeType(); + QualType InitPointeeType = CanonicalInitVarType->getPointeeType(); + // If the initializer and the variable are both pointers check if the + // un-qualified pointee types match otherwise we don't use auto. + if (!Context->hasSameUnqualifiedType(InitPointeeType, BeginPointeeType)) + return; + } else { + // Check for qualified types to avoid conversions from non-const to const + // iterator types. + if (!Context->hasSameType(CanonicalInitVarType, CanonicalBeginType)) + return; + } + + DerefByValue = Nodes.getNodeAs<QualType>(DerefByValueResultName) != 0; + if (!DerefByValue) { + if (const QualType *DerefType = + Nodes.getNodeAs<QualType>(DerefByRefResultName)) { + // A node will only be bound with DerefByRefResultName if we're dealing + // with a user-defined iterator type. Test the const qualification of + // the reference type. + DerefByConstRef = (*DerefType)->getAs<ReferenceType>()->getPointeeType() + .isConstQualified(); + } else { + // By nature of the matcher this case is triggered only for built-in + // iterator types (i.e. pointers). + assert(isa<PointerType>(CanonicalInitVarType) && + "Non-class iterator type is not a pointer type"); + QualType InitPointeeType = CanonicalInitVarType->getPointeeType(); + QualType BeginPointeeType = CanonicalBeginType->getPointeeType(); + // If the initializer and variable have both the same type just use auto + // otherwise we test for const qualification of the pointed-at type. + if (!Context->hasSameType(InitPointeeType, BeginPointeeType)) + DerefByConstRef = InitPointeeType.isConstQualified(); + } + } else { + // If the de-referece operator return by value then test for the canonical + // const qualification of the init variable type. + DerefByConstRef = CanonicalInitVarType.isConstQualified(); + } + } else if (FixerKind == LFK_PseudoArray) { + if (!EndCall) + return; + ContainerExpr = EndCall->getImplicitObjectArgument(); + const MemberExpr *Member = dyn_cast<MemberExpr>(EndCall->getCallee()); + if (!Member) + return; + ContainerNeedsDereference = Member->isArrow(); + } + // We must know the container or an array length bound. + if (!ContainerExpr && !BoundExpr) + return; + + findAndVerifyUsages(Context, LoopVar, EndVar, ContainerExpr, BoundExpr, + ContainerNeedsDereference, DerefByValue, DerefByConstRef, + TheLoop, ConfidenceLevel); +} diff --git a/clang-tools-extra/clang-modernize/LoopConvert/LoopActions.h b/clang-tools-extra/clang-modernize/LoopConvert/LoopActions.h new file mode 100644 index 00000000000..b72576bde82 --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/LoopActions.h @@ -0,0 +1,105 @@ +//===-- LoopConvert/LoopActions.h - C++11 For loop migration ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares matchers and callbacks for use in migrating C++ +/// for loops. +/// +//===----------------------------------------------------------------------===// + +#ifndef CPP11_MIGRATE_LOOP_ACTIONS_H +#define CPP11_MIGRATE_LOOP_ACTIONS_H + +#include "StmtAncestor.h" +#include "Core/Transform.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Tooling/Refactoring.h" + +struct Usage; +class Confidence; +// The main computational result of ForLoopIndexUseVisitor. +typedef llvm::SmallVector<Usage, 8> UsageResult; + +enum LoopFixerKind { + LFK_Array, + LFK_Iterator, + LFK_PseudoArray +}; + +/// \brief The callback to be used for loop migration matchers. +/// +/// The callback does extra checking not possible in matchers, and attempts to +/// convert the for loop, if possible. +class LoopFixer : public clang::ast_matchers::MatchFinder::MatchCallback { + public: + LoopFixer(StmtAncestorASTVisitor *ParentFinder, + StmtGeneratedVarNameMap *GeneratedDecls, + ReplacedVarsMap *ReplacedVarRanges, unsigned *AcceptedChanges, + unsigned *DeferredChanges, unsigned *RejectedChanges, + RiskLevel MaxRisk, LoopFixerKind FixerKind, Transform &Owner) + : ParentFinder(ParentFinder), + GeneratedDecls(GeneratedDecls), ReplacedVarRanges(ReplacedVarRanges), + AcceptedChanges(AcceptedChanges), DeferredChanges(DeferredChanges), + RejectedChanges(RejectedChanges), MaxRisk(MaxRisk), + FixerKind(FixerKind), Owner(Owner) {} + + virtual void + run(const clang::ast_matchers::MatchFinder::MatchResult &Result); + + private: + StmtAncestorASTVisitor *ParentFinder; + StmtGeneratedVarNameMap *GeneratedDecls; + ReplacedVarsMap *ReplacedVarRanges; + unsigned *AcceptedChanges; + unsigned *DeferredChanges; + unsigned *RejectedChanges; + RiskLevel MaxRisk; + LoopFixerKind FixerKind; + Transform &Owner; + + /// \brief Computes the changes needed to convert a given for loop, and + /// applies it. + void doConversion(clang::ASTContext *Context, + const clang::VarDecl *IndexVar, + const clang::VarDecl *MaybeContainer, + llvm::StringRef ContainerString, + const UsageResult &Usages, + const clang::DeclStmt *AliasDecl, + bool AliasUseRequired, + bool AliasFromForInit, + const clang::ForStmt *TheLoop, + bool ContainerNeedsDereference, + bool DerefByValue, + bool DerefByConstRef); + + /// \brief Given a loop header that would be convertible, discover all usages + /// of the index variable and convert the loop if possible. + void findAndVerifyUsages(clang::ASTContext *Context, + const clang::VarDecl *LoopVar, + const clang::VarDecl *EndVar, + const clang::Expr *ContainerExpr, + const clang::Expr *BoundExpr, + bool ContainerNeedsDereference, + bool DerefByValue, + bool DerefByConstRef, + const clang::ForStmt *TheLoop, + Confidence ConfidenceLevel); + + /// \brief Determine if the change should be deferred or rejected, returning + /// text which refers to the container iterated over if the change should + /// proceed. + llvm::StringRef checkDeferralsAndRejections(clang::ASTContext *Context, + const clang::Expr *ContainerExpr, + Confidence ConfidenceLevel, + const clang::ForStmt *TheLoop); +}; + +#endif // CPP11_MIGRATE_LOOP_ACTIONS_H diff --git a/clang-tools-extra/clang-modernize/LoopConvert/LoopConvert.cpp b/clang-tools-extra/clang-modernize/LoopConvert/LoopConvert.cpp new file mode 100644 index 00000000000..2dfa4562929 --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/LoopConvert.cpp @@ -0,0 +1,89 @@ +//===-- LoopConvert/LoopConvert.cpp - C++11 for-loop migration ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file provides the implementation of the LoopConvertTransform +/// class. +/// +//===----------------------------------------------------------------------===// + +#include "LoopConvert.h" +#include "LoopActions.h" +#include "LoopMatchers.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Refactoring.h" +#include "clang/Tooling/Tooling.h" + +using clang::ast_matchers::MatchFinder; +using namespace clang::tooling; +using namespace clang; + +int LoopConvertTransform::apply(const FileOverrides &InputStates, + const CompilationDatabase &Database, + const std::vector<std::string> &SourcePaths) { + ClangTool LoopTool(Database, SourcePaths); + + StmtAncestorASTVisitor ParentFinder; + StmtGeneratedVarNameMap GeneratedDecls; + ReplacedVarsMap ReplacedVars; + unsigned AcceptedChanges = 0; + unsigned DeferredChanges = 0; + unsigned RejectedChanges = 0; + + MatchFinder Finder; + LoopFixer ArrayLoopFixer(&ParentFinder, &GeneratedDecls, &ReplacedVars, + &AcceptedChanges, &DeferredChanges, &RejectedChanges, + Options().MaxRiskLevel, LFK_Array, + /*Owner=*/ *this); + Finder.addMatcher(makeArrayLoopMatcher(), &ArrayLoopFixer); + LoopFixer IteratorLoopFixer(&ParentFinder, &GeneratedDecls, &ReplacedVars, + &AcceptedChanges, &DeferredChanges, + &RejectedChanges, Options().MaxRiskLevel, + LFK_Iterator, /*Owner=*/ *this); + Finder.addMatcher(makeIteratorLoopMatcher(), &IteratorLoopFixer); + LoopFixer PseudoarrrayLoopFixer(&ParentFinder, &GeneratedDecls, &ReplacedVars, + &AcceptedChanges, &DeferredChanges, + &RejectedChanges, Options().MaxRiskLevel, + LFK_PseudoArray, /*Owner=*/ *this); + Finder.addMatcher(makePseudoArrayLoopMatcher(), &PseudoarrrayLoopFixer); + + setOverrides(InputStates); + + if (int result = LoopTool.run(createActionFactory(Finder))) { + llvm::errs() << "Error encountered during translation.\n"; + return result; + } + + setAcceptedChanges(AcceptedChanges); + setRejectedChanges(RejectedChanges); + setDeferredChanges(DeferredChanges); + + return 0; +} + +struct LoopConvertFactory : TransformFactory { + LoopConvertFactory() { + Since.Clang = Version(3, 0); + Since.Gcc = Version(4, 6); + Since.Icc = Version(13); + Since.Msvc = Version(11); + } + + Transform *createTransform(const TransformOptions &Opts) LLVM_OVERRIDE { + return new LoopConvertTransform(Opts); + } +}; + +// Register the factory using this statically initialized variable. +static TransformFactoryRegistry::Add<LoopConvertFactory> +X("loop-convert", "Make use of range-based for loops where possible"); + +// This anchor is used to force the linker to link in the generated object file +// and thus register the factory. +volatile int LoopConvertTransformAnchorSource = 0; diff --git a/clang-tools-extra/clang-modernize/LoopConvert/LoopConvert.h b/clang-tools-extra/clang-modernize/LoopConvert/LoopConvert.h new file mode 100644 index 00000000000..b45d9555b3d --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/LoopConvert.h @@ -0,0 +1,36 @@ +//===-- LoopConvert/LoopConvert.h - C++11 for-loop migration ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file provides the definition of the LoopConvertTransform +/// class which is the main interface to the loop-convert transform that tries +/// to make use of range-based for loops where possible. +/// +//===----------------------------------------------------------------------===// + +#ifndef CPP11_MIGRATE_LOOP_CONVERT_H +#define CPP11_MIGRATE_LOOP_CONVERT_H + +#include "Core/Transform.h" +#include "llvm/Support/Compiler.h" // For LLVM_OVERRIDE + +/// \brief Subclass of Transform that transforms for-loops into range-based +/// for-loops where possible. +class LoopConvertTransform : public Transform { +public: + LoopConvertTransform(const TransformOptions &Options) + : Transform("LoopConvert", Options) {} + + /// \see Transform::run(). + virtual int apply(const FileOverrides &InputStates, + const clang::tooling::CompilationDatabase &Database, + const std::vector<std::string> &SourcePaths) LLVM_OVERRIDE; +}; + +#endif // CPP11_MIGRATE_LOOP_CONVERT_H diff --git a/clang-tools-extra/clang-modernize/LoopConvert/LoopMatchers.cpp b/clang-tools-extra/clang-modernize/LoopConvert/LoopMatchers.cpp new file mode 100644 index 00000000000..719c2069fbe --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/LoopMatchers.cpp @@ -0,0 +1,346 @@ +//===-- LoopConvert/LoopMatchers.cpp - Matchers for for loops -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains definitions of the matchers for use in migrating +/// C++ for loops. +/// +//===----------------------------------------------------------------------===// + +#include "LoopMatchers.h" + +using namespace clang::ast_matchers; +using namespace clang; + +const char LoopName[] = "forLoop"; +const char ConditionBoundName[] = "conditionBound"; +const char ConditionVarName[] = "conditionVar"; +const char IncrementVarName[] = "incrementVar"; +const char InitVarName[] = "initVar"; +const char BeginCallName[] = "beginCall"; +const char EndCallName[] = "endCall"; +const char ConditionEndVarName[] = "conditionEndVar"; +const char EndVarName[] = "endVar"; +const char DerefByValueResultName[] = "derefByValueResult"; +const char DerefByRefResultName[] = "derefByRefResult"; + +// shared matchers +static const TypeMatcher AnyType = anything(); + +static const StatementMatcher IntegerComparisonMatcher = + expr(ignoringParenImpCasts(declRefExpr(to( + varDecl(hasType(isInteger())).bind(ConditionVarName))))); + +static const DeclarationMatcher InitToZeroMatcher = + varDecl(hasInitializer(ignoringParenImpCasts( + integerLiteral(equals(0))))).bind(InitVarName); + +static const StatementMatcher IncrementVarMatcher = + declRefExpr(to( + varDecl(hasType(isInteger())).bind(IncrementVarName))); + +// FIXME: How best to document complicated matcher expressions? They're fairly +// self-documenting...but there may be some unintuitive parts. + +/// \brief The matcher for loops over arrays. +/// +/// In this general example, assuming 'j' and 'k' are of integral type: +/// \code +/// for (int i = 0; j < 3 + 2; ++k) { ... } +/// \endcode +/// The following string identifers are bound to the parts of the AST: +/// ConditionVarName: 'j' (as a VarDecl) +/// ConditionBoundName: '3 + 2' (as an Expr) +/// InitVarName: 'i' (as a VarDecl) +/// IncrementVarName: 'k' (as a VarDecl) +/// LoopName: The entire for loop (as a ForStmt) +/// +/// Client code will need to make sure that: +/// - The three index variables identified by the matcher are the same +/// VarDecl. +/// - The index variable is only used as an array index. +/// - All arrays indexed by the loop are the same. +StatementMatcher makeArrayLoopMatcher() { + StatementMatcher ArrayBoundMatcher = + expr(hasType(isInteger())).bind(ConditionBoundName); + + return forStmt( + hasLoopInit(declStmt(hasSingleDecl(InitToZeroMatcher))), + hasCondition(anyOf(binaryOperator(hasOperatorName("<"), + hasLHS(IntegerComparisonMatcher), + hasRHS(ArrayBoundMatcher)), + binaryOperator(hasOperatorName(">"), + hasLHS(ArrayBoundMatcher), + hasRHS(IntegerComparisonMatcher)))), + hasIncrement(unaryOperator(hasOperatorName("++"), + hasUnaryOperand(IncrementVarMatcher)))) + .bind(LoopName); +} + +/// \brief The matcher used for iterator-based for loops. +/// +/// This matcher is more flexible than array-based loops. It will match +/// catch loops of the following textual forms (regardless of whether the +/// iterator type is actually a pointer type or a class type): +/// +/// Assuming f, g, and h are of type containerType::iterator, +/// \code +/// for (containerType::iterator it = container.begin(), +/// e = createIterator(); f != g; ++h) { ... } +/// for (containerType::iterator it = container.begin(); +/// f != anotherContainer.end(); ++h) { ... } +/// \endcode +/// The following string identifiers are bound to the parts of the AST: +/// InitVarName: 'it' (as a VarDecl) +/// ConditionVarName: 'f' (as a VarDecl) +/// LoopName: The entire for loop (as a ForStmt) +/// In the first example only: +/// EndVarName: 'e' (as a VarDecl) +/// ConditionEndVarName: 'g' (as a VarDecl) +/// In the second example only: +/// EndCallName: 'container.end()' (as a CXXMemberCallExpr) +/// +/// Client code will need to make sure that: +/// - The iterator variables 'it', 'f', and 'h' are the same +/// - The two containers on which 'begin' and 'end' are called are the same +/// - If the end iterator variable 'g' is defined, it is the same as 'f' +StatementMatcher makeIteratorLoopMatcher() { + StatementMatcher BeginCallMatcher = + memberCallExpr( + argumentCountIs(0), + callee( + methodDecl(hasName("begin")) + ) + ).bind(BeginCallName); + + DeclarationMatcher InitDeclMatcher = + varDecl( + hasInitializer( + anyOf( + ignoringParenImpCasts(BeginCallMatcher), + materializeTemporaryExpr(ignoringParenImpCasts(BeginCallMatcher)), + hasDescendant(BeginCallMatcher) + ) + ) + ).bind(InitVarName); + + DeclarationMatcher EndDeclMatcher = + varDecl(hasInitializer(anything())).bind(EndVarName); + + StatementMatcher EndCallMatcher = + memberCallExpr(argumentCountIs(0), callee(methodDecl(hasName("end")))); + + StatementMatcher IteratorBoundMatcher = + expr(anyOf(ignoringParenImpCasts(declRefExpr(to( + varDecl().bind(ConditionEndVarName)))), + ignoringParenImpCasts( + expr(EndCallMatcher).bind(EndCallName)), + materializeTemporaryExpr(ignoringParenImpCasts( + expr(EndCallMatcher).bind(EndCallName))))); + + StatementMatcher IteratorComparisonMatcher = + expr(ignoringParenImpCasts(declRefExpr(to( + varDecl().bind(ConditionVarName))))); + + StatementMatcher OverloadedNEQMatcher = operatorCallExpr( + hasOverloadedOperatorName("!="), + argumentCountIs(2), + hasArgument(0, IteratorComparisonMatcher), + hasArgument(1, IteratorBoundMatcher)); + + // This matcher tests that a declaration is a CXXRecordDecl that has an + // overloaded operator*(). If the operator*() returns by value instead of by + // reference then the return type is tagged with DerefByValueResultName. + internal::Matcher<VarDecl> TestDerefReturnsByValue = + hasType( + recordDecl( + hasMethod( + allOf( + hasOverloadedOperatorName("*"), + anyOf( + // Tag the return type if it's by value. + returns( + qualType( + unless(hasCanonicalType(referenceType())) + ).bind(DerefByValueResultName) + ), + returns( + // Skip loops where the iterator's operator* returns an + // rvalue reference. This is just weird. + qualType( + unless( + hasCanonicalType(rValueReferenceType()) + ) + ).bind(DerefByRefResultName) + ) + ) + ) + ) + ) + ); + + + return + forStmt( + hasLoopInit(anyOf( + declStmt( + declCountIs(2), + containsDeclaration(0, InitDeclMatcher), + containsDeclaration(1, EndDeclMatcher) + ), + declStmt(hasSingleDecl(InitDeclMatcher)) + )), + hasCondition(anyOf( + binaryOperator( + hasOperatorName("!="), + hasLHS(IteratorComparisonMatcher), + hasRHS(IteratorBoundMatcher) + ), + binaryOperator( + hasOperatorName("!="), + hasLHS(IteratorBoundMatcher), + hasRHS(IteratorComparisonMatcher) + ), + OverloadedNEQMatcher + )), + hasIncrement(anyOf( + unaryOperator( + hasOperatorName("++"), + hasUnaryOperand( + declRefExpr(to( + varDecl(hasType(pointsTo(AnyType))).bind(IncrementVarName) + )) + ) + ), + operatorCallExpr( + hasOverloadedOperatorName("++"), + hasArgument(0, + declRefExpr(to( + varDecl(TestDerefReturnsByValue).bind(IncrementVarName) + )) + ) + ) + )) + ).bind(LoopName); +} + +/// \brief The matcher used for array-like containers (pseudoarrays). +/// +/// This matcher is more flexible than array-based loops. It will match +/// loops of the following textual forms (regardless of whether the +/// iterator type is actually a pointer type or a class type): +/// +/// Assuming f, g, and h are of type containerType::iterator, +/// \code +/// for (int i = 0, j = container.size(); f < g; ++h) { ... } +/// for (int i = 0; f < container.size(); ++h) { ... } +/// \endcode +/// The following string identifiers are bound to the parts of the AST: +/// InitVarName: 'i' (as a VarDecl) +/// ConditionVarName: 'f' (as a VarDecl) +/// LoopName: The entire for loop (as a ForStmt) +/// In the first example only: +/// EndVarName: 'j' (as a VarDecl) +/// ConditionEndVarName: 'g' (as a VarDecl) +/// In the second example only: +/// EndCallName: 'container.size()' (as a CXXMemberCallExpr) +/// +/// Client code will need to make sure that: +/// - The index variables 'i', 'f', and 'h' are the same +/// - The containers on which 'size()' is called is the container indexed +/// - The index variable is only used in overloaded operator[] or +/// container.at() +/// - If the end iterator variable 'g' is defined, it is the same as 'j' +/// - The container's iterators would not be invalidated during the loop +StatementMatcher makePseudoArrayLoopMatcher() { + // Test that the incoming type has a record declaration that has methods + // called 'begin' and 'end'. If the incoming type is const, then make sure + // these methods are also marked const. + // + // FIXME: To be completely thorough this matcher should also ensure the + // return type of begin/end is an iterator that dereferences to the same as + // what operator[] or at() returns. Such a test isn't likely to fail except + // for pathological cases. + // + // FIXME: Also, a record doesn't necessarily need begin() and end(). Free + // functions called begin() and end() taking the container as an argument + // are also allowed. + TypeMatcher RecordWithBeginEnd = + qualType(anyOf( + qualType( + isConstQualified(), + hasDeclaration( + recordDecl( + hasMethod( + methodDecl( + hasName("begin"), + isConst() + ) + ), + hasMethod( + methodDecl( + hasName("end"), + isConst() + ) + ) + ) + ) // hasDeclaration + ), // qualType + qualType( + unless(isConstQualified()), + hasDeclaration( + recordDecl( + hasMethod(hasName("begin")), + hasMethod(hasName("end")) + ) + ) + ) // qualType + ) + ); + + StatementMatcher SizeCallMatcher = + memberCallExpr(argumentCountIs(0), + callee(methodDecl(anyOf(hasName("size"), + hasName("length")))), + on(anyOf(hasType(pointsTo(RecordWithBeginEnd)), + hasType(RecordWithBeginEnd)))); + + StatementMatcher EndInitMatcher = + expr(anyOf( + ignoringParenImpCasts(expr(SizeCallMatcher).bind(EndCallName)), + explicitCastExpr(hasSourceExpression(ignoringParenImpCasts( + expr(SizeCallMatcher).bind(EndCallName)))))); + + DeclarationMatcher EndDeclMatcher = + varDecl(hasInitializer(EndInitMatcher)).bind(EndVarName); + + StatementMatcher IndexBoundMatcher = + expr(anyOf( + ignoringParenImpCasts(declRefExpr(to( + varDecl(hasType(isInteger())).bind(ConditionEndVarName)))), + EndInitMatcher)); + + return forStmt( + hasLoopInit(anyOf( + declStmt(declCountIs(2), + containsDeclaration(0, InitToZeroMatcher), + containsDeclaration(1, EndDeclMatcher)), + declStmt(hasSingleDecl(InitToZeroMatcher)))), + hasCondition(anyOf( + binaryOperator(hasOperatorName("<"), + hasLHS(IntegerComparisonMatcher), + hasRHS(IndexBoundMatcher)), + binaryOperator(hasOperatorName(">"), + hasLHS(IndexBoundMatcher), + hasRHS(IntegerComparisonMatcher)))), + hasIncrement(unaryOperator( + hasOperatorName("++"), + hasUnaryOperand(IncrementVarMatcher)))) + .bind(LoopName); +} diff --git a/clang-tools-extra/clang-modernize/LoopConvert/LoopMatchers.h b/clang-tools-extra/clang-modernize/LoopConvert/LoopMatchers.h new file mode 100644 index 00000000000..b0cd8a5a7cc --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/LoopMatchers.h @@ -0,0 +1,42 @@ +//===-- LoopConvert/LoopMatchers.h - Matchers for for loops -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains declarations of the matchers for use in migrating +/// C++ for loops. The matchers are responsible for checking the general shape +/// of the for loop, namely the init, condition, and increment portions. +/// Further analysis will be needed to confirm that the loop is in fact +/// convertible in the matcher callback. +/// +//===----------------------------------------------------------------------===// + +#ifndef CPP11_MIGRATE_LOOP_MATCHERS_H +#define CPP11_MIGRATE_LOOP_MATCHERS_H + +#include "clang/ASTMatchers/ASTMatchers.h" + +// Constants used for matcher name bindings +extern const char LoopName[]; +extern const char ConditionBoundName[]; +extern const char ConditionVarName[]; +extern const char ConditionEndVarName[]; +extern const char IncrementVarName[]; +extern const char InitVarName[]; +extern const char BeginCallName[]; +extern const char EndExprName[]; +extern const char EndCallName[]; +extern const char EndVarName[]; +extern const char DerefByValueResultName[]; +extern const char DerefByRefResultName[]; + +clang::ast_matchers::StatementMatcher makeArrayLoopMatcher(); +clang::ast_matchers::StatementMatcher makeIteratorLoopMatcher(); +clang::ast_matchers::StatementMatcher makePseudoArrayLoopMatcher(); + +#endif // CPP11_MIGRATE_LOOP_MATCHERS_H diff --git a/clang-tools-extra/clang-modernize/LoopConvert/StmtAncestor.cpp b/clang-tools-extra/clang-modernize/LoopConvert/StmtAncestor.cpp new file mode 100644 index 00000000000..33f576bd814 --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/StmtAncestor.cpp @@ -0,0 +1,140 @@ +//===-- LoopConvert/StmtAncestor.cpp - AST property visitors --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the definitions of several RecursiveASTVisitors +/// used to build and check data structures used in loop migration. +/// +//===----------------------------------------------------------------------===// + +#include "StmtAncestor.h" + +using namespace clang; + +/// \brief Tracks a stack of parent statements during traversal. +/// +/// All this really does is inject push_back() before running +/// RecursiveASTVisitor::TraverseStmt() and pop_back() afterwards. The Stmt atop +/// the stack is the parent of the current statement (NULL for the topmost +/// statement). +bool StmtAncestorASTVisitor::TraverseStmt(Stmt *Statement) { + StmtAncestors.insert(std::make_pair(Statement, StmtStack.back())); + StmtStack.push_back(Statement); + RecursiveASTVisitor<StmtAncestorASTVisitor>::TraverseStmt(Statement); + StmtStack.pop_back(); + return true; +} + +/// \brief Keep track of the DeclStmt associated with each VarDecl. +/// +/// Combined with StmtAncestors, this provides roughly the same information as +/// Scope, as we can map a VarDecl to its DeclStmt, then walk up the parent tree +/// using StmtAncestors. +bool StmtAncestorASTVisitor::VisitDeclStmt(DeclStmt *Decls) { + for (DeclStmt::const_decl_iterator I = Decls->decl_begin(), + E = Decls->decl_end(); I != E; ++I) + if (const VarDecl *V = dyn_cast<VarDecl>(*I)) + DeclParents.insert(std::make_pair(V, Decls)); + return true; +} + +/// \brief record the DeclRefExpr as part of the parent expression. +bool ComponentFinderASTVisitor::VisitDeclRefExpr(DeclRefExpr *E) { + Components.push_back(E); + return true; +} + +/// \brief record the MemberExpr as part of the parent expression. +bool ComponentFinderASTVisitor::VisitMemberExpr(MemberExpr *Member) { + Components.push_back(Member); + return true; +} + +/// \brief Forward any DeclRefExprs to a check on the referenced variable +/// declaration. +bool DependencyFinderASTVisitor::VisitDeclRefExpr(DeclRefExpr *DeclRef) { + if (VarDecl *V = dyn_cast_or_null<VarDecl>(DeclRef->getDecl())) + return VisitVarDecl(V); + return true; +} + +/// \brief Determine if any this variable is declared inside the ContainingStmt. +bool DependencyFinderASTVisitor::VisitVarDecl(VarDecl *V) { + const Stmt *Curr = DeclParents->lookup(V); + // First, see if the variable was declared within an inner scope of the loop. + while (Curr != NULL) { + if (Curr == ContainingStmt) { + DependsOnInsideVariable = true; + return false; + } + Curr = StmtParents->lookup(Curr); + } + + // Next, check if the variable was removed from existence by an earlier + // iteration. + for (ReplacedVarsMap::const_iterator I = ReplacedVars->begin(), + E = ReplacedVars->end(); I != E; ++I) + if ((*I).second == V) { + DependsOnInsideVariable = true; + return false; + } + return true; +} + +/// \brief If we already created a variable for TheLoop, check to make sure +/// that the name was not already taken. +bool DeclFinderASTVisitor::VisitForStmt(ForStmt *TheLoop) { + StmtGeneratedVarNameMap::const_iterator I = GeneratedDecls->find(TheLoop); + if (I != GeneratedDecls->end() && I->second == Name) { + Found = true; + return false; + } + return true; +} + +/// \brief If any named declaration within the AST subtree has the same name, +/// then consider Name already taken. +bool DeclFinderASTVisitor::VisitNamedDecl(NamedDecl *D) { + const IdentifierInfo *Ident = D->getIdentifier(); + if (Ident && Ident->getName() == Name) { + Found = true; + return false; + } + return true; +} + +/// \brief Forward any declaration references to the actual check on the +/// referenced declaration. +bool DeclFinderASTVisitor::VisitDeclRefExpr(DeclRefExpr *DeclRef) { + if (NamedDecl *D = dyn_cast<NamedDecl>(DeclRef->getDecl())) + return VisitNamedDecl(D); + return true; +} + +/// \brief If the new variable name conflicts with any type used in the loop, +/// then we mark that variable name as taken. +bool DeclFinderASTVisitor::VisitTypeLoc(TypeLoc TL) { + QualType QType = TL.getType(); + + // Check if our name conflicts with a type, to handle for typedefs. + if (QType.getAsString() == Name) { + Found = true; + return false; + } + // Check for base type conflicts. For example, when a struct is being + // referenced in the body of the loop, the above getAsString() will return the + // whole type (ex. "struct s"), but will be caught here. + if (const IdentifierInfo *Ident = QType.getBaseTypeIdentifier()) { + if (Ident->getName() == Name) { + Found = true; + return false; + } + } + return true; +} diff --git a/clang-tools-extra/clang-modernize/LoopConvert/StmtAncestor.h b/clang-tools-extra/clang-modernize/LoopConvert/StmtAncestor.h new file mode 100644 index 00000000000..24079097652 --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/StmtAncestor.h @@ -0,0 +1,201 @@ +//===-- LoopConvert/StmtAncestor.h - AST property visitors ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the declarations of several RecursiveASTVisitors +/// used to build and check data structures used in loop migration. +/// +//===----------------------------------------------------------------------===// + +#ifndef CPP11_MIGRATE_STMT_ANCESTOR_H +#define CPP11_MIGRATE_STMT_ANCESTOR_H + +#include "clang/AST/RecursiveASTVisitor.h" + +/// A map used to walk the AST in reverse: maps child Stmt to parent Stmt. +typedef llvm::DenseMap<const clang::Stmt*, const clang::Stmt*> StmtParentMap; + +/// A map used to walk the AST in reverse: +/// maps VarDecl to the to parent DeclStmt. +typedef +llvm::DenseMap<const clang::VarDecl*, const clang::DeclStmt*> DeclParentMap; + +/// A map used to track which variables have been removed by a refactoring pass. +/// It maps the parent ForStmt to the removed index variable's VarDecl. +typedef +llvm::DenseMap<const clang::ForStmt*, const clang::VarDecl*> ReplacedVarsMap; + +/// A map used to remember the variable names generated in a Stmt +typedef llvm::DenseMap<const clang::Stmt*, std::string> StmtGeneratedVarNameMap; + +/// A vector used to store the AST subtrees of an Expr. +typedef llvm::SmallVector<const clang::Expr*, 16> ComponentVector; + +/// \brief Class used build the reverse AST properties needed to detect +/// name conflicts and free variables. +class StmtAncestorASTVisitor : + public clang::RecursiveASTVisitor<StmtAncestorASTVisitor> { +public: + StmtAncestorASTVisitor() { + StmtStack.push_back(NULL); + } + + /// \brief Run the analysis on the TranslationUnitDecl. + /// + /// In case we're running this analysis multiple times, don't repeat the work. + void gatherAncestors(const clang::TranslationUnitDecl *T) { + if (StmtAncestors.empty()) + TraverseDecl(const_cast<clang::TranslationUnitDecl*>(T)); + } + + /// Accessor for StmtAncestors. + const StmtParentMap &getStmtToParentStmtMap() { + return StmtAncestors; + } + + /// Accessor for DeclParents. + const DeclParentMap &getDeclToParentStmtMap() { + return DeclParents; + } + + friend class clang::RecursiveASTVisitor<StmtAncestorASTVisitor>; + +private: + StmtParentMap StmtAncestors; + DeclParentMap DeclParents; + llvm::SmallVector<const clang::Stmt*, 16> StmtStack; + + bool TraverseStmt(clang::Stmt *Statement); + bool VisitDeclStmt(clang::DeclStmt *Statement); +}; + +/// Class used to find the variables and member expressions on which an +/// arbitrary expression depends. +class ComponentFinderASTVisitor : + public clang::RecursiveASTVisitor<ComponentFinderASTVisitor> { +public: + ComponentFinderASTVisitor() { } + + /// Find the components of an expression and place them in a ComponentVector. + void findExprComponents(const clang::Expr *SourceExpr) { + clang::Expr *E = const_cast<clang::Expr *>(SourceExpr); + TraverseStmt(E); + } + + /// Accessor for Components. + const ComponentVector &getComponents() { + return Components; + } + + friend class clang::RecursiveASTVisitor<ComponentFinderASTVisitor>; + +private: + ComponentVector Components; + + bool VisitDeclRefExpr(clang::DeclRefExpr *E); + bool VisitMemberExpr(clang::MemberExpr *Member); +}; + +/// Class used to determine if an expression is dependent on a variable declared +/// inside of the loop where it would be used. +class DependencyFinderASTVisitor : + public clang::RecursiveASTVisitor<DependencyFinderASTVisitor> { +public: + DependencyFinderASTVisitor(const StmtParentMap *StmtParents, + const DeclParentMap *DeclParents, + const ReplacedVarsMap *ReplacedVars, + const clang::Stmt *ContainingStmt) : + StmtParents(StmtParents), DeclParents(DeclParents), + ContainingStmt(ContainingStmt), ReplacedVars(ReplacedVars) { } + + /// \brief Run the analysis on Body, and return true iff the expression + /// depends on some variable declared within ContainingStmt. + /// + /// This is intended to protect against hoisting the container expression + /// outside of an inner context if part of that expression is declared in that + /// inner context. + /// + /// For example, + /// \code + /// const int N = 10, M = 20; + /// int arr[N][M]; + /// int getRow(); + /// + /// for (int i = 0; i < M; ++i) { + /// int k = getRow(); + /// printf("%d:", arr[k][i]); + /// } + /// \endcode + /// At first glance, this loop looks like it could be changed to + /// \code + /// for (int elem : arr[k]) { + /// int k = getIndex(); + /// printf("%d:", elem); + /// } + /// \endcode + /// But this is malformed, since `k` is used before it is defined! + /// + /// In order to avoid this, this class looks at the container expression + /// `arr[k]` and decides whether or not it contains a sub-expression declared + /// within the the loop body. + bool dependsOnInsideVariable(const clang::Stmt *Body) { + DependsOnInsideVariable = false; + TraverseStmt(const_cast<clang::Stmt *>(Body)); + return DependsOnInsideVariable; + } + + friend class clang::RecursiveASTVisitor<DependencyFinderASTVisitor>; + +private: + const StmtParentMap *StmtParents; + const DeclParentMap *DeclParents; + const clang::Stmt *ContainingStmt; + const ReplacedVarsMap *ReplacedVars; + bool DependsOnInsideVariable; + + bool VisitVarDecl(clang::VarDecl *V); + bool VisitDeclRefExpr(clang::DeclRefExpr *D); +}; + +/// Class used to determine if any declarations used in a Stmt would conflict +/// with a particular identifier. This search includes the names that don't +/// actually appear in the AST (i.e. created by a refactoring tool) by including +/// a map from Stmts to generated names associated with those stmts. +class DeclFinderASTVisitor : + public clang::RecursiveASTVisitor<DeclFinderASTVisitor> { +public: + DeclFinderASTVisitor(const std::string &Name, + const StmtGeneratedVarNameMap *GeneratedDecls) : + Name(Name), GeneratedDecls(GeneratedDecls), Found(false) { } + + /// Attempts to find any usages of variables name Name in Body, returning + /// true when it is used in Body. This includes the generated loop variables + /// of ForStmts which have already been transformed. + bool findUsages(const clang::Stmt *Body) { + Found = false; + TraverseStmt(const_cast<clang::Stmt *>(Body)); + return Found; + } + + friend class clang::RecursiveASTVisitor<DeclFinderASTVisitor>; + +private: + std::string Name; + /// GeneratedDecls keeps track of ForStmts which have been tranformed, mapping + /// each modified ForStmt to the variable generated in the loop. + const StmtGeneratedVarNameMap *GeneratedDecls; + bool Found; + + bool VisitForStmt(clang::ForStmt *F); + bool VisitNamedDecl(clang::NamedDecl *D); + bool VisitDeclRefExpr(clang::DeclRefExpr *D); + bool VisitTypeLoc(clang::TypeLoc TL); +}; + +#endif // CPP11_MIGRATE_STMT_ANCESTOR_H diff --git a/clang-tools-extra/clang-modernize/LoopConvert/VariableNaming.cpp b/clang-tools-extra/clang-modernize/LoopConvert/VariableNaming.cpp new file mode 100644 index 00000000000..853e4830ca6 --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/VariableNaming.cpp @@ -0,0 +1,95 @@ +//===-- LoopConvert/VariableNaming.cpp - Gererate variable names ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the definitino of the VariableNamer class, which +/// is responsible for generating new variable names and ensuring that they do +/// not conflict with existing ones. +/// +//===----------------------------------------------------------------------===// + +#include "VariableNaming.h" + +using namespace llvm; +using namespace clang; + +std::string VariableNamer::createIndexName() { + // FIXME: Add in naming conventions to handle: + // - Uppercase/lowercase indices + // - How to handle conflicts + // - An interactive process for naming + std::string IteratorName; + std::string ContainerName; + if (TheContainer) + ContainerName = TheContainer->getName().str(); + + size_t Len = ContainerName.length(); + if (Len > 1 && ContainerName[Len - 1] == 's') + IteratorName = ContainerName.substr(0, Len - 1); + else + IteratorName = "elem"; + + if (!declarationExists(IteratorName)) + return IteratorName; + + IteratorName = ContainerName + "_" + OldIndex->getName().str(); + if (!declarationExists(IteratorName)) + return IteratorName; + + IteratorName = ContainerName + "_elem"; + if (!declarationExists(IteratorName)) + return IteratorName; + + IteratorName += "_elem"; + if (!declarationExists(IteratorName)) + return IteratorName; + + IteratorName = "_elem_"; + + // Someone defeated my naming scheme... + while (declarationExists(IteratorName)) + IteratorName += "i"; + return IteratorName; +} + +/// \brief Determines whether or not the the name \a Symbol conflicts with +/// language keywords or defined macros. Also checks if the name exists in +/// LoopContext, any of its parent contexts, or any of its child statements. +/// +/// We also check to see if the same identifier was generated by this loop +/// converter in a loop nested within SourceStmt. +bool VariableNamer::declarationExists(StringRef Symbol) { + assert(Context != 0 && "Expected an ASTContext"); + IdentifierInfo &Ident = Context->Idents.get(Symbol); + + // Check if the symbol is not an identifier (ie. is a keyword or alias). + if (!isAnyIdentifier(Ident.getTokenID())) + return true; + + // Check for conflicting macro definitions. + if (Ident.hasMacroDefinition()) + return true; + + // Determine if the symbol was generated in a parent context. + for (const Stmt *S = SourceStmt; S != NULL; S = ReverseAST->lookup(S)) { + StmtGeneratedVarNameMap::const_iterator I = GeneratedDecls->find(S); + if (I != GeneratedDecls->end() && I->second == Symbol) + return true; + } + + // FIXME: Rather than detecting conflicts at their usages, we should check the + // parent context. + // For some reason, lookup() always returns the pair (NULL, NULL) because its + // StoredDeclsMap is not initialized (i.e. LookupPtr.getInt() is false inside + // of DeclContext::lookup()). Why is this? + + // Finally, determine if the symbol was used in the loop or a child context. + DeclFinderASTVisitor DeclFinder(Symbol, GeneratedDecls); + return DeclFinder.findUsages(SourceStmt); +} diff --git a/clang-tools-extra/clang-modernize/LoopConvert/VariableNaming.h b/clang-tools-extra/clang-modernize/LoopConvert/VariableNaming.h new file mode 100644 index 00000000000..066ed1e5af4 --- /dev/null +++ b/clang-tools-extra/clang-modernize/LoopConvert/VariableNaming.h @@ -0,0 +1,59 @@ +//===-- LoopConvert/VariableNaming.h - Gererate variable names --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the declaration of the VariableNamer class, which +/// is responsible for generating new variable names and ensuring that they do +/// not conflict with existing ones. +// +//===----------------------------------------------------------------------===// + +#ifndef CPP11_MIGRATE_VARIABLE_NAMING_H +#define CPP11_MIGRATE_VARIABLE_NAMING_H + +#include "StmtAncestor.h" +#include "clang/AST/ASTContext.h" + +/// \brief Create names for generated variables within a particular statement. +/// +/// VariableNamer uses a DeclContext as a reference point, checking for any +/// conflicting declarations higher up in the context or within SourceStmt. +/// It creates a variable name using hints from a source container and the old +/// index, if they exist. +class VariableNamer { + public: + VariableNamer( + StmtGeneratedVarNameMap *GeneratedDecls, const StmtParentMap *ReverseAST, + const clang::Stmt *SourceStmt, const clang::VarDecl *OldIndex, + const clang::VarDecl *TheContainer, const clang::ASTContext *Context) + : GeneratedDecls(GeneratedDecls), ReverseAST(ReverseAST), + SourceStmt(SourceStmt), OldIndex(OldIndex), TheContainer(TheContainer), + Context(Context) {} + + /// \brief Generate a new index name. + /// + /// Generates the name to be used for an inserted iterator. It relies on + /// declarationExists() to determine that there are no naming conflicts, and + /// tries to use some hints from the container name and the old index name. + std::string createIndexName(); + + private: + StmtGeneratedVarNameMap *GeneratedDecls; + const StmtParentMap *ReverseAST; + const clang::Stmt *SourceStmt; + const clang::VarDecl *OldIndex; + const clang::VarDecl *TheContainer; + const clang::ASTContext *Context; + + // Determine whether or not a declaration that would conflict with Symbol + // exists in an outer context or in any statement contained in SourceStmt. + bool declarationExists(llvm::StringRef Symbol); +}; + +#endif // CPP11_MIGRATE_VARIABLE_NAMING_H |