diff options
-rw-r--r-- | polly/include/polly/ScopInfo.h | 7 | ||||
-rw-r--r-- | polly/lib/Analysis/ScopInfo.cpp | 19 | ||||
-rw-r--r-- | polly/lib/Support/SCEVAffinator.cpp | 31 | ||||
-rw-r--r-- | polly/test/ScopInfo/complex-branch-structure.ll | 2 | ||||
-rw-r--r-- | polly/test/ScopInfo/complex-expression.ll | 142 | ||||
-rw-r--r-- | polly/test/ScopInfo/complex-successor-structure.ll | 2 |
6 files changed, 196 insertions, 7 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index ff55a05173f..ea106b00cfc 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -76,10 +76,10 @@ enum AssumptionKind { INBOUNDS, WRAPPING, ERRORBLOCK, + COMPLEXITY, INFINITELOOP, INVARIANTLOAD, DELINEARIZATION, - ERROR_DOMAINCONJUNCTS, }; /// @brief Enum to distinguish between assumptions and restrictions. @@ -1981,6 +1981,11 @@ public: /// @param BB An (optional) basic block in which the isl_pw_aff is computed. /// SCEVs known to not reference any loops in the SCoP can be /// passed without a @p BB. + /// + /// Note that this function will always return a valid isl_pw_aff. However, if + /// the translation of @p E was deemed to complex the SCoP is invalidated and + /// a dummy value of appropriate dimension is returned. This allows to bail + /// for complex cases without "error handling code" needed on the users side. __isl_give isl_pw_aff *getPwAff(const SCEV *E, BasicBlock *BB = nullptr); /// @brief Return the non-loop carried conditions on the domain of @p Stmt. diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 471a6669f7a..d9b2e062d0f 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -2293,7 +2293,7 @@ void Scop::buildDomainsWithBranchConstraints(Region *R, ScopDetection &SD, isl_set_free(SuccDomain); SuccDomain = Empty; HasComplexCFG = true; - invalidate(ERROR_DOMAINCONJUNCTS, DebugLoc()); + invalidate(COMPLEXITY, DebugLoc()); } } } @@ -3213,6 +3213,8 @@ static std::string toString(AssumptionKind Kind) { return "Inbounds"; case WRAPPING: return "No-overflows"; + case COMPLEXITY: + return "Low complexity"; case ERRORBLOCK: return "No-error"; case INFINITELOOP: @@ -3221,8 +3223,6 @@ static std::string toString(AssumptionKind Kind) { return "Invariant load"; case DELINEARIZATION: return "Delinearization"; - case ERROR_DOMAINCONJUNCTS: - return "Low number of domain conjuncts"; } llvm_unreachable("Unknown AssumptionKind!"); } @@ -3381,7 +3381,18 @@ void Scop::dump() const { print(dbgs()); } isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); } __isl_give isl_pw_aff *Scop::getPwAff(const SCEV *E, BasicBlock *BB) { - return Affinator.getPwAff(E, BB); + // First try to use the SCEVAffinator to generate a piecewise defined + // affine function from @p E in the context of @p BB. If that tasks becomes to + // complex the affinator might return a nullptr. In such a case we invalidate + // the SCoP and return a dummy value. This way we do not need to add error + // handling cdoe to all users of this function. + auto *PWA = Affinator.getPwAff(E, BB); + if (PWA) + return PWA; + + auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc(); + invalidate(COMPLEXITY, DL); + return Affinator.getPwAff(SE->getZero(E->getType()), BB); } __isl_give isl_union_set *Scop::getDomains() const { diff --git a/polly/lib/Support/SCEVAffinator.cpp b/polly/lib/Support/SCEVAffinator.cpp index 084fcf0de00..a9e542c25f0 100644 --- a/polly/lib/Support/SCEVAffinator.cpp +++ b/polly/lib/Support/SCEVAffinator.cpp @@ -24,6 +24,33 @@ using namespace llvm; using namespace polly; +// The maximal number of basic sets we allow during the construction of a +// piecewise affine function. More complex ones will result in very high +// compile time. +static int const MaxConjunctsInPwAff = 100; + +/// @brief Add the number of basic sets in @p Domain to @p User +static isl_stat addNumBasicSets(isl_set *Domain, isl_aff *Aff, void *User) { + auto *NumBasicSets = static_cast<unsigned *>(User); + *NumBasicSets += isl_set_n_basic_set(Domain); + isl_set_free(Domain); + isl_aff_free(Aff); + return isl_stat_ok; +} + +/// @brief Determine if @p PWA is to complex to continue +/// +/// Note that @p PWA will be "free" (deallocated) if this function returns true, +/// but not if this function returns false. +static bool isToComplex(isl_pw_aff *PWA) { + unsigned NumBasicSets = 0; + isl_pw_aff_foreach_piece(PWA, addNumBasicSets, &NumBasicSets); + if (NumBasicSets <= MaxConjunctsInPwAff) + return false; + isl_pw_aff_free(PWA); + return true; +} + SCEVAffinator::SCEVAffinator(Scop *S, LoopInfo &LI) : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()), LI(LI), TD(R.getEntry()->getParent()->getParent()->getDataLayout()) {} @@ -233,6 +260,8 @@ __isl_give isl_pw_aff *SCEVAffinator::visitAddExpr(const SCEVAddExpr *Expr) { for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) { isl_pw_aff *NextSummand = visit(Expr->getOperand(i)); Sum = isl_pw_aff_add(Sum, NextSummand); + if (isToComplex(Sum)) + return nullptr; } return Sum; @@ -292,6 +321,8 @@ __isl_give isl_pw_aff *SCEVAffinator::visitSMaxExpr(const SCEVSMaxExpr *Expr) { for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) { isl_pw_aff *NextOperand = visit(Expr->getOperand(i)); Max = isl_pw_aff_max(Max, NextOperand); + if (isToComplex(Max)) + return nullptr; } return Max; diff --git a/polly/test/ScopInfo/complex-branch-structure.ll b/polly/test/ScopInfo/complex-branch-structure.ll index 201a4103d43..1e08789b283 100644 --- a/polly/test/ScopInfo/complex-branch-structure.ll +++ b/polly/test/ScopInfo/complex-branch-structure.ll @@ -20,7 +20,7 @@ ; \ / / ; loop backedge -; CHECK: Low number of domain conjuncts assumption: { : 1 = 0 } +; CHECK: Low complexity assumption: { : 1 = 0 } define void @foo(float* %A, float* %B, float* %C, float* %D, float* %E, i64 %A1.p, i64 %A2.p, i64 %A3.p, diff --git a/polly/test/ScopInfo/complex-expression.ll b/polly/test/ScopInfo/complex-expression.ll new file mode 100644 index 00000000000..2a979bbfd8e --- /dev/null +++ b/polly/test/ScopInfo/complex-expression.ll @@ -0,0 +1,142 @@ +; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops \ +; RUN: < %s 2>&1 | FileCheck %s +; +; This test case has an SCEVSMax expression with a very high arity. The +; piecewise affine function we would create for it would have a huge amount of +; conjuncts, thus it would take a lot of time creating and handling it. +; +; This ensures we bail out for really complex expressions: +; +; CHECK: Low complexity assumption: { : 1 = 0 } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; Function Attrs: norecurse nounwind +define i32 @foo(i32* nocapture readonly %src1, i32* nocapture readonly %src2, i32* nocapture %score, i32* nocapture %max, i32 %n) #0 { +entry: + %cmp33 = icmp sgt i32 %n, 0 + br i1 %cmp33, label %for.body.preheader, label %for.body7.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body7.preheader.loopexit: ; preds = %for.body + br label %for.body7.preheader + +for.body7.preheader: ; preds = %for.body7.preheader.loopexit, %entry + %0 = load i32, i32* %score, align 4, !tbaa !3 + %cmp9 = icmp sgt i32 %0, -1 + %.scoreMax.0 = select i1 %cmp9, i32 %0, i32 -1 + %arrayidx8.1 = getelementptr inbounds i32, i32* %score, i32 1 + %1 = load i32, i32* %arrayidx8.1, align 4, !tbaa !3 + %cmp9.1 = icmp sgt i32 %1, %.scoreMax.0 + %.scoreMax.0.1 = select i1 %cmp9.1, i32 %1, i32 %.scoreMax.0 + %arrayidx8.2 = getelementptr inbounds i32, i32* %score, i32 2 + %2 = load i32, i32* %arrayidx8.2, align 4, !tbaa !3 + %cmp9.2 = icmp sgt i32 %2, %.scoreMax.0.1 + %.scoreMax.0.2 = select i1 %cmp9.2, i32 %2, i32 %.scoreMax.0.1 + %arrayidx8.3 = getelementptr inbounds i32, i32* %score, i32 3 + %3 = load i32, i32* %arrayidx8.3, align 4, !tbaa !3 + %cmp9.3 = icmp sgt i32 %3, %.scoreMax.0.2 + %.scoreMax.0.3 = select i1 %cmp9.3, i32 %3, i32 %.scoreMax.0.2 + %arrayidx8.4 = getelementptr inbounds i32, i32* %score, i32 4 + %4 = load i32, i32* %arrayidx8.4, align 4, !tbaa !3 + %cmp9.4 = icmp sgt i32 %4, %.scoreMax.0.3 + %.scoreMax.0.4 = select i1 %cmp9.4, i32 %4, i32 %.scoreMax.0.3 + %arrayidx8.5 = getelementptr inbounds i32, i32* %score, i32 5 + %5 = load i32, i32* %arrayidx8.5, align 4, !tbaa !3 + %cmp9.5 = icmp sgt i32 %5, %.scoreMax.0.4 + %.scoreMax.0.5 = select i1 %cmp9.5, i32 %5, i32 %.scoreMax.0.4 + %arrayidx8.6 = getelementptr inbounds i32, i32* %score, i32 6 + %6 = load i32, i32* %arrayidx8.6, align 4, !tbaa !3 + %cmp9.6 = icmp sgt i32 %6, %.scoreMax.0.5 + %.scoreMax.0.6 = select i1 %cmp9.6, i32 %6, i32 %.scoreMax.0.5 + %arrayidx8.7 = getelementptr inbounds i32, i32* %score, i32 7 + %7 = load i32, i32* %arrayidx8.7, align 4, !tbaa !3 + %cmp9.7 = icmp sgt i32 %7, %.scoreMax.0.6 + %.scoreMax.0.7 = select i1 %cmp9.7, i32 %7, i32 %.scoreMax.0.6 + %arrayidx8.8 = getelementptr inbounds i32, i32* %score, i32 8 + %8 = load i32, i32* %arrayidx8.8, align 4, !tbaa !3 + %cmp9.8 = icmp sgt i32 %8, %.scoreMax.0.7 + %.scoreMax.0.8 = select i1 %cmp9.8, i32 %8, i32 %.scoreMax.0.7 + %arrayidx8.9 = getelementptr inbounds i32, i32* %score, i32 9 + %9 = load i32, i32* %arrayidx8.9, align 4, !tbaa !3 + %cmp9.9 = icmp sgt i32 %9, %.scoreMax.0.8 + %.scoreMax.0.9 = select i1 %cmp9.9, i32 %9, i32 %.scoreMax.0.8 + %arrayidx8.10 = getelementptr inbounds i32, i32* %score, i32 10 + %10 = load i32, i32* %arrayidx8.10, align 4, !tbaa !3 + %cmp9.10 = icmp sgt i32 %10, %.scoreMax.0.9 + %.scoreMax.0.10 = select i1 %cmp9.10, i32 %10, i32 %.scoreMax.0.9 + %arrayidx8.11 = getelementptr inbounds i32, i32* %score, i32 11 + %11 = load i32, i32* %arrayidx8.11, align 4, !tbaa !3 + %cmp9.11 = icmp sgt i32 %11, %.scoreMax.0.10 + %.scoreMax.0.11 = select i1 %cmp9.11, i32 %11, i32 %.scoreMax.0.10 + %arrayidx8.12 = getelementptr inbounds i32, i32* %score, i32 12 + %12 = load i32, i32* %arrayidx8.12, align 4, !tbaa !3 + %cmp9.12 = icmp sgt i32 %12, %.scoreMax.0.11 + %.scoreMax.0.12 = select i1 %cmp9.12, i32 %12, i32 %.scoreMax.0.11 + %arrayidx8.13 = getelementptr inbounds i32, i32* %score, i32 13 + %13 = load i32, i32* %arrayidx8.13, align 4, !tbaa !3 + %cmp9.13 = icmp sgt i32 %13, %.scoreMax.0.12 + %.scoreMax.0.13 = select i1 %cmp9.13, i32 %13, i32 %.scoreMax.0.12 + %arrayidx8.14 = getelementptr inbounds i32, i32* %score, i32 14 + %14 = load i32, i32* %arrayidx8.14, align 4, !tbaa !3 + %cmp9.14 = icmp sgt i32 %14, %.scoreMax.0.13 + %.scoreMax.0.14 = select i1 %cmp9.14, i32 %14, i32 %.scoreMax.0.13 + %arrayidx8.15 = getelementptr inbounds i32, i32* %score, i32 15 + %15 = load i32, i32* %arrayidx8.15, align 4, !tbaa !3 + %cmp9.15 = icmp sgt i32 %15, %.scoreMax.0.14 + %.scoreMax.0.15 = select i1 %cmp9.15, i32 %15, i32 %.scoreMax.0.14 + %arrayidx8.16 = getelementptr inbounds i32, i32* %score, i32 16 + %16 = load i32, i32* %arrayidx8.16, align 4, !tbaa !3 + %cmp9.16 = icmp sgt i32 %16, %.scoreMax.0.15 + %.scoreMax.0.16 = select i1 %cmp9.16, i32 %16, i32 %.scoreMax.0.15 + %arrayidx8.17 = getelementptr inbounds i32, i32* %score, i32 17 + %17 = load i32, i32* %arrayidx8.17, align 4, !tbaa !3 + %cmp9.17 = icmp sgt i32 %17, %.scoreMax.0.16 + %.scoreMax.0.17 = select i1 %cmp9.17, i32 %17, i32 %.scoreMax.0.16 + %arrayidx8.18 = getelementptr inbounds i32, i32* %score, i32 18 + %18 = load i32, i32* %arrayidx8.18, align 4, !tbaa !3 + %cmp9.18 = icmp sgt i32 %18, %.scoreMax.0.17 + %.scoreMax.0.18 = select i1 %cmp9.18, i32 %18, i32 %.scoreMax.0.17 + %arrayidx8.19 = getelementptr inbounds i32, i32* %score, i32 19 + %19 = load i32, i32* %arrayidx8.19, align 4, !tbaa !3 + %cmp9.19 = icmp sgt i32 %19, %.scoreMax.0.18 + %.scoreMax.0.19 = select i1 %cmp9.19, i32 %19, i32 %.scoreMax.0.18 + %cmp14 = icmp eq i32 %.scoreMax.0.19, -1 + br i1 %cmp14, label %cleanup, label %if.end16 + +for.body: ; preds = %for.body.preheader, %for.body + %i.034 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %src1, i32 %i.034 + %20 = load i32, i32* %arrayidx, align 4, !tbaa !3 + %arrayidx1 = getelementptr inbounds i32, i32* %src2, i32 %i.034 + %21 = load i32, i32* %arrayidx1, align 4, !tbaa !3 + %add = add nsw i32 %21, %20 + %arrayidx2 = getelementptr inbounds i32, i32* %score, i32 %i.034 + store i32 %add, i32* %arrayidx2, align 4, !tbaa !3 + %inc = add nuw nsw i32 %i.034, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.body7.preheader.loopexit, label %for.body + +if.end16: ; preds = %for.body7.preheader + store i32 %.scoreMax.0.19, i32* %max, align 4, !tbaa !3 + br label %cleanup + +cleanup: ; preds = %for.body7.preheader, %if.end16 + %retval.0 = phi i32 [ 1, %if.end16 ], [ 0, %for.body7.preheader ] + ret i32 %retval.0 +} + +attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 3.9.0 (http://llvm.org/git/clang.git 24028ac5d033be4c1e87ce65affb4ea61deb1897) (http://llvm.org/git/llvm.git b88337289681d6ede5bba5da77007c83509e54ae)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} diff --git a/polly/test/ScopInfo/complex-successor-structure.ll b/polly/test/ScopInfo/complex-successor-structure.ll index 37e0edb3025..83871cf72cd 100644 --- a/polly/test/ScopInfo/complex-successor-structure.ll +++ b/polly/test/ScopInfo/complex-successor-structure.ll @@ -5,7 +5,7 @@ ; of following form and check that the domain construction does not take a huge ; amount of time. ; -; CHECK: Low number of domain conjuncts assumption: { : 1 = 0 } +; CHECK: Low complexity assumption: { : 1 = 0 } ; | ; for.body <--+ |