summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Grosser <tobias@grosser.es>2015-06-29 14:44:22 +0000
committerTobias Grosser <tobias@grosser.es>2015-06-29 14:44:22 +0000
commit1b13ddea50d4ef62382c35d979b3b002c1ad4be9 (patch)
treeb4df13603f961702054eab9b0a20e9010820a593
parent23bceb2eecdc0eec9b7e7548b6155d1833095dc4 (diff)
downloadbcm5719-llvm-1b13ddea50d4ef62382c35d979b3b002c1ad4be9.tar.gz
bcm5719-llvm-1b13ddea50d4ef62382c35d979b3b002c1ad4be9.zip
Add first support to delinearize A[t%2][i][j]
This is very preliminary support, but it seems to work for the most common case. When observing more/different test cases, we can work on generalizing this. llvm-svn: 240955
-rw-r--r--polly/lib/Analysis/ScopDetection.cpp42
-rw-r--r--polly/test/ScopInfo/multidim_srem.ll92
2 files changed, 130 insertions, 4 deletions
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index e5c60efcf89..36792e50069 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -470,10 +470,44 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
// First step: collect parametric terms in all array references.
SmallVector<const SCEV *, 4> Terms;
for (const auto &Pair : Context.Accesses[BasePointer]) {
- const SCEVAddRecExpr *AF = dyn_cast<SCEVAddRecExpr>(Pair.second);
-
- if (AF)
+ if (auto *AF = dyn_cast<SCEVAddRecExpr>(Pair.second))
SE->collectParametricTerms(AF, Terms);
+
+ // In case the outermost expression is a plain add, we check if any of its
+ // terms has the form 4 * %inst * %param * %param ..., aka a term that
+ // contains a product between a parameter and an instruction that is
+ // inside the scop. Such instructions, if allowed at all, are instructions
+ // SCEV can not represent, but Polly is still looking through. As a
+ // result, these instructions can depend on induction variables and are
+ // most likely no array sizes. However, terms that are multiplied with
+ // them are likely candidates for array sizes.
+ if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
+ for (auto Op : AF->operands()) {
+ if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
+ SE->collectParametricTerms(AF2, Terms);
+ if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
+ SmallVector<const SCEV *, 0> Operands;
+ bool TermsHasInRegionInst = false;
+
+ for (auto *MulOp : AF2->operands()) {
+ if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
+ Operands.push_back(Const);
+ if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
+ if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
+ if (!Context.CurRegion.contains(Inst))
+ Operands.push_back(MulOp);
+ else
+ TermsHasInRegionInst = true;
+
+ } else {
+ Operands.push_back(MulOp);
+ }
+ }
+ }
+ Terms.push_back(SE->getMulExpr(Operands));
+ }
+ }
+ }
}
// Second step: find array shape.
@@ -517,7 +551,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
MapInsnToMemAcc TempMemoryAccesses;
for (const auto &Pair : Context.Accesses[BasePointer]) {
const Instruction *Insn = Pair.first;
- const SCEVAddRecExpr *AF = dyn_cast<SCEVAddRecExpr>(Pair.second);
+ auto *AF = Pair.second;
bool IsNonAffine = false;
TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
diff --git a/polly/test/ScopInfo/multidim_srem.ll b/polly/test/ScopInfo/multidim_srem.ll
new file mode 100644
index 00000000000..7f181a07c57
--- /dev/null
+++ b/polly/test/ScopInfo/multidim_srem.ll
@@ -0,0 +1,92 @@
+; RUN: opt %loadPolly -analyze -polly-scops -S < %s | FileCheck %s
+;
+; void foo(long n, float A[][n][n]) {
+; for (long i = 0; i < 200; i++)
+; for (long j = 0; j < n; j++)
+; for (long k = 0; k < n; k++)
+; A[i % 2][j][k] += 10;
+; }
+;
+; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 >= 0 and o0 <= 1) };
+; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 >= 0 and o0 <= 1) };
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+
+define void @foo(i64 %n, float* %A) #0 {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ br label %for.cond.1.preheader
+
+for.cond.1.preheader: ; preds = %entry.split, %for.inc.14
+ %i.06 = phi i64 [ 0, %entry.split ], [ %inc15, %for.inc.14 ]
+ %cmp2.3 = icmp sgt i64 %n, 0
+ br i1 %cmp2.3, label %for.cond.5.preheader.lr.ph, label %for.inc.14
+
+for.cond.5.preheader.lr.ph: ; preds = %for.cond.1.preheader
+ br label %for.cond.5.preheader
+
+for.cond.5.preheader: ; preds = %for.cond.5.preheader.lr.ph, %for.inc.11
+ %j.04 = phi i64 [ 0, %for.cond.5.preheader.lr.ph ], [ %inc12, %for.inc.11 ]
+ %cmp6.1 = icmp sgt i64 %n, 0
+ br i1 %cmp6.1, label %for.body.8.lr.ph, label %for.inc.11
+
+for.body.8.lr.ph: ; preds = %for.cond.5.preheader
+ br label %for.body.8
+
+for.body.8: ; preds = %for.body.8.lr.ph, %for.body.8
+ %k.02 = phi i64 [ 0, %for.body.8.lr.ph ], [ %inc, %for.body.8 ]
+ %rem = srem i64 %i.06, 2
+ %0 = mul nuw i64 %n, %n
+ %1 = mul nsw i64 %0, %rem
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %1
+ %2 = mul nsw i64 %j.04, %n
+ %arrayidx9 = getelementptr inbounds float, float* %arrayidx, i64 %2
+ %arrayidx10 = getelementptr inbounds float, float* %arrayidx9, i64 %k.02
+ %3 = load float, float* %arrayidx10, align 4, !tbaa !1
+ %add = fadd float %3, 1.000000e+01
+ store float %add, float* %arrayidx10, align 4, !tbaa !1
+ %inc = add nuw nsw i64 %k.02, 1
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body.8, label %for.cond.5.for.inc.11_crit_edge
+
+for.cond.5.for.inc.11_crit_edge: ; preds = %for.body.8
+ br label %for.inc.11
+
+for.inc.11: ; preds = %for.cond.5.for.inc.11_crit_edge, %for.cond.5.preheader
+ %inc12 = add nuw nsw i64 %j.04, 1
+ %exitcond7 = icmp ne i64 %inc12, %n
+ br i1 %exitcond7, label %for.cond.5.preheader, label %for.cond.1.for.inc.14_crit_edge
+
+for.cond.1.for.inc.14_crit_edge: ; preds = %for.inc.11
+ br label %for.inc.14
+
+for.inc.14: ; preds = %for.cond.1.for.inc.14_crit_edge, %for.cond.1.preheader
+ %inc15 = add nuw nsw i64 %i.06, 1
+ %exitcond8 = icmp ne i64 %inc15, 200
+ br i1 %exitcond8, label %for.cond.1.preheader, label %for.end.16
+
+for.end.16: ; preds = %for.inc.14
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 240923) (llvm/trunk 240924)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"float", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
OpenPOWER on IntegriCloud