summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/include/polly/ScopInfo.h7
-rw-r--r--polly/lib/Analysis/ScopInfo.cpp19
-rw-r--r--polly/lib/Support/SCEVAffinator.cpp31
-rw-r--r--polly/test/ScopInfo/complex-branch-structure.ll2
-rw-r--r--polly/test/ScopInfo/complex-expression.ll142
-rw-r--r--polly/test/ScopInfo/complex-successor-structure.ll2
6 files changed, 196 insertions, 7 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index ff55a05173f..ea106b00cfc 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -76,10 +76,10 @@ enum AssumptionKind {
INBOUNDS,
WRAPPING,
ERRORBLOCK,
+ COMPLEXITY,
INFINITELOOP,
INVARIANTLOAD,
DELINEARIZATION,
- ERROR_DOMAINCONJUNCTS,
};
/// @brief Enum to distinguish between assumptions and restrictions.
@@ -1981,6 +1981,11 @@ public:
/// @param BB An (optional) basic block in which the isl_pw_aff is computed.
/// SCEVs known to not reference any loops in the SCoP can be
/// passed without a @p BB.
+ ///
+ /// Note that this function will always return a valid isl_pw_aff. However, if
+ /// the translation of @p E was deemed to complex the SCoP is invalidated and
+ /// a dummy value of appropriate dimension is returned. This allows to bail
+ /// for complex cases without "error handling code" needed on the users side.
__isl_give isl_pw_aff *getPwAff(const SCEV *E, BasicBlock *BB = nullptr);
/// @brief Return the non-loop carried conditions on the domain of @p Stmt.
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 471a6669f7a..d9b2e062d0f 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -2293,7 +2293,7 @@ void Scop::buildDomainsWithBranchConstraints(Region *R, ScopDetection &SD,
isl_set_free(SuccDomain);
SuccDomain = Empty;
HasComplexCFG = true;
- invalidate(ERROR_DOMAINCONJUNCTS, DebugLoc());
+ invalidate(COMPLEXITY, DebugLoc());
}
}
}
@@ -3213,6 +3213,8 @@ static std::string toString(AssumptionKind Kind) {
return "Inbounds";
case WRAPPING:
return "No-overflows";
+ case COMPLEXITY:
+ return "Low complexity";
case ERRORBLOCK:
return "No-error";
case INFINITELOOP:
@@ -3221,8 +3223,6 @@ static std::string toString(AssumptionKind Kind) {
return "Invariant load";
case DELINEARIZATION:
return "Delinearization";
- case ERROR_DOMAINCONJUNCTS:
- return "Low number of domain conjuncts";
}
llvm_unreachable("Unknown AssumptionKind!");
}
@@ -3381,7 +3381,18 @@ void Scop::dump() const { print(dbgs()); }
isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); }
__isl_give isl_pw_aff *Scop::getPwAff(const SCEV *E, BasicBlock *BB) {
- return Affinator.getPwAff(E, BB);
+ // First try to use the SCEVAffinator to generate a piecewise defined
+ // affine function from @p E in the context of @p BB. If that tasks becomes to
+ // complex the affinator might return a nullptr. In such a case we invalidate
+ // the SCoP and return a dummy value. This way we do not need to add error
+ // handling cdoe to all users of this function.
+ auto *PWA = Affinator.getPwAff(E, BB);
+ if (PWA)
+ return PWA;
+
+ auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc();
+ invalidate(COMPLEXITY, DL);
+ return Affinator.getPwAff(SE->getZero(E->getType()), BB);
}
__isl_give isl_union_set *Scop::getDomains() const {
diff --git a/polly/lib/Support/SCEVAffinator.cpp b/polly/lib/Support/SCEVAffinator.cpp
index 084fcf0de00..a9e542c25f0 100644
--- a/polly/lib/Support/SCEVAffinator.cpp
+++ b/polly/lib/Support/SCEVAffinator.cpp
@@ -24,6 +24,33 @@
using namespace llvm;
using namespace polly;
+// The maximal number of basic sets we allow during the construction of a
+// piecewise affine function. More complex ones will result in very high
+// compile time.
+static int const MaxConjunctsInPwAff = 100;
+
+/// @brief Add the number of basic sets in @p Domain to @p User
+static isl_stat addNumBasicSets(isl_set *Domain, isl_aff *Aff, void *User) {
+ auto *NumBasicSets = static_cast<unsigned *>(User);
+ *NumBasicSets += isl_set_n_basic_set(Domain);
+ isl_set_free(Domain);
+ isl_aff_free(Aff);
+ return isl_stat_ok;
+}
+
+/// @brief Determine if @p PWA is to complex to continue
+///
+/// Note that @p PWA will be "free" (deallocated) if this function returns true,
+/// but not if this function returns false.
+static bool isToComplex(isl_pw_aff *PWA) {
+ unsigned NumBasicSets = 0;
+ isl_pw_aff_foreach_piece(PWA, addNumBasicSets, &NumBasicSets);
+ if (NumBasicSets <= MaxConjunctsInPwAff)
+ return false;
+ isl_pw_aff_free(PWA);
+ return true;
+}
+
SCEVAffinator::SCEVAffinator(Scop *S, LoopInfo &LI)
: S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()), LI(LI),
TD(R.getEntry()->getParent()->getParent()->getDataLayout()) {}
@@ -233,6 +260,8 @@ __isl_give isl_pw_aff *SCEVAffinator::visitAddExpr(const SCEVAddExpr *Expr) {
for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) {
isl_pw_aff *NextSummand = visit(Expr->getOperand(i));
Sum = isl_pw_aff_add(Sum, NextSummand);
+ if (isToComplex(Sum))
+ return nullptr;
}
return Sum;
@@ -292,6 +321,8 @@ __isl_give isl_pw_aff *SCEVAffinator::visitSMaxExpr(const SCEVSMaxExpr *Expr) {
for (int i = 1, e = Expr->getNumOperands(); i < e; ++i) {
isl_pw_aff *NextOperand = visit(Expr->getOperand(i));
Max = isl_pw_aff_max(Max, NextOperand);
+ if (isToComplex(Max))
+ return nullptr;
}
return Max;
diff --git a/polly/test/ScopInfo/complex-branch-structure.ll b/polly/test/ScopInfo/complex-branch-structure.ll
index 201a4103d43..1e08789b283 100644
--- a/polly/test/ScopInfo/complex-branch-structure.ll
+++ b/polly/test/ScopInfo/complex-branch-structure.ll
@@ -20,7 +20,7 @@
; \ / /
; loop backedge
-; CHECK: Low number of domain conjuncts assumption: { : 1 = 0 }
+; CHECK: Low complexity assumption: { : 1 = 0 }
define void @foo(float* %A, float* %B, float* %C, float* %D, float* %E,
i64 %A1.p, i64 %A2.p, i64 %A3.p,
diff --git a/polly/test/ScopInfo/complex-expression.ll b/polly/test/ScopInfo/complex-expression.ll
new file mode 100644
index 00000000000..2a979bbfd8e
--- /dev/null
+++ b/polly/test/ScopInfo/complex-expression.ll
@@ -0,0 +1,142 @@
+; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops \
+; RUN: < %s 2>&1 | FileCheck %s
+;
+; This test case has an SCEVSMax expression with a very high arity. The
+; piecewise affine function we would create for it would have a huge amount of
+; conjuncts, thus it would take a lot of time creating and handling it.
+;
+; This ensures we bail out for really complex expressions:
+;
+; CHECK: Low complexity assumption: { : 1 = 0 }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; Function Attrs: norecurse nounwind
+define i32 @foo(i32* nocapture readonly %src1, i32* nocapture readonly %src2, i32* nocapture %score, i32* nocapture %max, i32 %n) #0 {
+entry:
+ %cmp33 = icmp sgt i32 %n, 0
+ br i1 %cmp33, label %for.body.preheader, label %for.body7.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body7.preheader.loopexit: ; preds = %for.body
+ br label %for.body7.preheader
+
+for.body7.preheader: ; preds = %for.body7.preheader.loopexit, %entry
+ %0 = load i32, i32* %score, align 4, !tbaa !3
+ %cmp9 = icmp sgt i32 %0, -1
+ %.scoreMax.0 = select i1 %cmp9, i32 %0, i32 -1
+ %arrayidx8.1 = getelementptr inbounds i32, i32* %score, i32 1
+ %1 = load i32, i32* %arrayidx8.1, align 4, !tbaa !3
+ %cmp9.1 = icmp sgt i32 %1, %.scoreMax.0
+ %.scoreMax.0.1 = select i1 %cmp9.1, i32 %1, i32 %.scoreMax.0
+ %arrayidx8.2 = getelementptr inbounds i32, i32* %score, i32 2
+ %2 = load i32, i32* %arrayidx8.2, align 4, !tbaa !3
+ %cmp9.2 = icmp sgt i32 %2, %.scoreMax.0.1
+ %.scoreMax.0.2 = select i1 %cmp9.2, i32 %2, i32 %.scoreMax.0.1
+ %arrayidx8.3 = getelementptr inbounds i32, i32* %score, i32 3
+ %3 = load i32, i32* %arrayidx8.3, align 4, !tbaa !3
+ %cmp9.3 = icmp sgt i32 %3, %.scoreMax.0.2
+ %.scoreMax.0.3 = select i1 %cmp9.3, i32 %3, i32 %.scoreMax.0.2
+ %arrayidx8.4 = getelementptr inbounds i32, i32* %score, i32 4
+ %4 = load i32, i32* %arrayidx8.4, align 4, !tbaa !3
+ %cmp9.4 = icmp sgt i32 %4, %.scoreMax.0.3
+ %.scoreMax.0.4 = select i1 %cmp9.4, i32 %4, i32 %.scoreMax.0.3
+ %arrayidx8.5 = getelementptr inbounds i32, i32* %score, i32 5
+ %5 = load i32, i32* %arrayidx8.5, align 4, !tbaa !3
+ %cmp9.5 = icmp sgt i32 %5, %.scoreMax.0.4
+ %.scoreMax.0.5 = select i1 %cmp9.5, i32 %5, i32 %.scoreMax.0.4
+ %arrayidx8.6 = getelementptr inbounds i32, i32* %score, i32 6
+ %6 = load i32, i32* %arrayidx8.6, align 4, !tbaa !3
+ %cmp9.6 = icmp sgt i32 %6, %.scoreMax.0.5
+ %.scoreMax.0.6 = select i1 %cmp9.6, i32 %6, i32 %.scoreMax.0.5
+ %arrayidx8.7 = getelementptr inbounds i32, i32* %score, i32 7
+ %7 = load i32, i32* %arrayidx8.7, align 4, !tbaa !3
+ %cmp9.7 = icmp sgt i32 %7, %.scoreMax.0.6
+ %.scoreMax.0.7 = select i1 %cmp9.7, i32 %7, i32 %.scoreMax.0.6
+ %arrayidx8.8 = getelementptr inbounds i32, i32* %score, i32 8
+ %8 = load i32, i32* %arrayidx8.8, align 4, !tbaa !3
+ %cmp9.8 = icmp sgt i32 %8, %.scoreMax.0.7
+ %.scoreMax.0.8 = select i1 %cmp9.8, i32 %8, i32 %.scoreMax.0.7
+ %arrayidx8.9 = getelementptr inbounds i32, i32* %score, i32 9
+ %9 = load i32, i32* %arrayidx8.9, align 4, !tbaa !3
+ %cmp9.9 = icmp sgt i32 %9, %.scoreMax.0.8
+ %.scoreMax.0.9 = select i1 %cmp9.9, i32 %9, i32 %.scoreMax.0.8
+ %arrayidx8.10 = getelementptr inbounds i32, i32* %score, i32 10
+ %10 = load i32, i32* %arrayidx8.10, align 4, !tbaa !3
+ %cmp9.10 = icmp sgt i32 %10, %.scoreMax.0.9
+ %.scoreMax.0.10 = select i1 %cmp9.10, i32 %10, i32 %.scoreMax.0.9
+ %arrayidx8.11 = getelementptr inbounds i32, i32* %score, i32 11
+ %11 = load i32, i32* %arrayidx8.11, align 4, !tbaa !3
+ %cmp9.11 = icmp sgt i32 %11, %.scoreMax.0.10
+ %.scoreMax.0.11 = select i1 %cmp9.11, i32 %11, i32 %.scoreMax.0.10
+ %arrayidx8.12 = getelementptr inbounds i32, i32* %score, i32 12
+ %12 = load i32, i32* %arrayidx8.12, align 4, !tbaa !3
+ %cmp9.12 = icmp sgt i32 %12, %.scoreMax.0.11
+ %.scoreMax.0.12 = select i1 %cmp9.12, i32 %12, i32 %.scoreMax.0.11
+ %arrayidx8.13 = getelementptr inbounds i32, i32* %score, i32 13
+ %13 = load i32, i32* %arrayidx8.13, align 4, !tbaa !3
+ %cmp9.13 = icmp sgt i32 %13, %.scoreMax.0.12
+ %.scoreMax.0.13 = select i1 %cmp9.13, i32 %13, i32 %.scoreMax.0.12
+ %arrayidx8.14 = getelementptr inbounds i32, i32* %score, i32 14
+ %14 = load i32, i32* %arrayidx8.14, align 4, !tbaa !3
+ %cmp9.14 = icmp sgt i32 %14, %.scoreMax.0.13
+ %.scoreMax.0.14 = select i1 %cmp9.14, i32 %14, i32 %.scoreMax.0.13
+ %arrayidx8.15 = getelementptr inbounds i32, i32* %score, i32 15
+ %15 = load i32, i32* %arrayidx8.15, align 4, !tbaa !3
+ %cmp9.15 = icmp sgt i32 %15, %.scoreMax.0.14
+ %.scoreMax.0.15 = select i1 %cmp9.15, i32 %15, i32 %.scoreMax.0.14
+ %arrayidx8.16 = getelementptr inbounds i32, i32* %score, i32 16
+ %16 = load i32, i32* %arrayidx8.16, align 4, !tbaa !3
+ %cmp9.16 = icmp sgt i32 %16, %.scoreMax.0.15
+ %.scoreMax.0.16 = select i1 %cmp9.16, i32 %16, i32 %.scoreMax.0.15
+ %arrayidx8.17 = getelementptr inbounds i32, i32* %score, i32 17
+ %17 = load i32, i32* %arrayidx8.17, align 4, !tbaa !3
+ %cmp9.17 = icmp sgt i32 %17, %.scoreMax.0.16
+ %.scoreMax.0.17 = select i1 %cmp9.17, i32 %17, i32 %.scoreMax.0.16
+ %arrayidx8.18 = getelementptr inbounds i32, i32* %score, i32 18
+ %18 = load i32, i32* %arrayidx8.18, align 4, !tbaa !3
+ %cmp9.18 = icmp sgt i32 %18, %.scoreMax.0.17
+ %.scoreMax.0.18 = select i1 %cmp9.18, i32 %18, i32 %.scoreMax.0.17
+ %arrayidx8.19 = getelementptr inbounds i32, i32* %score, i32 19
+ %19 = load i32, i32* %arrayidx8.19, align 4, !tbaa !3
+ %cmp9.19 = icmp sgt i32 %19, %.scoreMax.0.18
+ %.scoreMax.0.19 = select i1 %cmp9.19, i32 %19, i32 %.scoreMax.0.18
+ %cmp14 = icmp eq i32 %.scoreMax.0.19, -1
+ br i1 %cmp14, label %cleanup, label %if.end16
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.034 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %src1, i32 %i.034
+ %20 = load i32, i32* %arrayidx, align 4, !tbaa !3
+ %arrayidx1 = getelementptr inbounds i32, i32* %src2, i32 %i.034
+ %21 = load i32, i32* %arrayidx1, align 4, !tbaa !3
+ %add = add nsw i32 %21, %20
+ %arrayidx2 = getelementptr inbounds i32, i32* %score, i32 %i.034
+ store i32 %add, i32* %arrayidx2, align 4, !tbaa !3
+ %inc = add nuw nsw i32 %i.034, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.body7.preheader.loopexit, label %for.body
+
+if.end16: ; preds = %for.body7.preheader
+ store i32 %.scoreMax.0.19, i32* %max, align 4, !tbaa !3
+ br label %cleanup
+
+cleanup: ; preds = %for.body7.preheader, %if.end16
+ %retval.0 = phi i32 [ 1, %if.end16 ], [ 0, %for.body7.preheader ]
+ ret i32 %retval.0
+}
+
+attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.9.0 (http://llvm.org/git/clang.git 24028ac5d033be4c1e87ce65affb4ea61deb1897) (http://llvm.org/git/llvm.git b88337289681d6ede5bba5da77007c83509e54ae)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/ScopInfo/complex-successor-structure.ll b/polly/test/ScopInfo/complex-successor-structure.ll
index 37e0edb3025..83871cf72cd 100644
--- a/polly/test/ScopInfo/complex-successor-structure.ll
+++ b/polly/test/ScopInfo/complex-successor-structure.ll
@@ -5,7 +5,7 @@
; of following form and check that the domain construction does not take a huge
; amount of time.
;
-; CHECK: Low number of domain conjuncts assumption: { : 1 = 0 }
+; CHECK: Low complexity assumption: { : 1 = 0 }
; |
; for.body <--+
OpenPOWER on IntegriCloud