diff options
| author | Tobias Grosser <tobias@grosser.es> | 2015-06-29 14:44:22 +0000 |
|---|---|---|
| committer | Tobias Grosser <tobias@grosser.es> | 2015-06-29 14:44:22 +0000 |
| commit | 1b13ddea50d4ef62382c35d979b3b002c1ad4be9 (patch) | |
| tree | b4df13603f961702054eab9b0a20e9010820a593 | |
| parent | 23bceb2eecdc0eec9b7e7548b6155d1833095dc4 (diff) | |
| download | bcm5719-llvm-1b13ddea50d4ef62382c35d979b3b002c1ad4be9.tar.gz bcm5719-llvm-1b13ddea50d4ef62382c35d979b3b002c1ad4be9.zip | |
Add first support to delinearize A[t%2][i][j]
This is very preliminary support, but it seems to work for the most common case.
When observing more/different test cases, we can work on generalizing this.
llvm-svn: 240955
| -rw-r--r-- | polly/lib/Analysis/ScopDetection.cpp | 42 | ||||
| -rw-r--r-- | polly/test/ScopInfo/multidim_srem.ll | 92 |
2 files changed, 130 insertions, 4 deletions
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index e5c60efcf89..36792e50069 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -470,10 +470,44 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { // First step: collect parametric terms in all array references. SmallVector<const SCEV *, 4> Terms; for (const auto &Pair : Context.Accesses[BasePointer]) { - const SCEVAddRecExpr *AF = dyn_cast<SCEVAddRecExpr>(Pair.second); - - if (AF) + if (auto *AF = dyn_cast<SCEVAddRecExpr>(Pair.second)) SE->collectParametricTerms(AF, Terms); + + // In case the outermost expression is a plain add, we check if any of its + // terms has the form 4 * %inst * %param * %param ..., aka a term that + // contains a product between a parameter and an instruction that is + // inside the scop. Such instructions, if allowed at all, are instructions + // SCEV can not represent, but Polly is still looking through. As a + // result, these instructions can depend on induction variables and are + // most likely no array sizes. However, terms that are multiplied with + // them are likely candidates for array sizes. + if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { + for (auto Op : AF->operands()) { + if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) + SE->collectParametricTerms(AF2, Terms); + if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { + SmallVector<const SCEV *, 0> Operands; + bool TermsHasInRegionInst = false; + + for (auto *MulOp : AF2->operands()) { + if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) + Operands.push_back(Const); + if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { + if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { + if (!Context.CurRegion.contains(Inst)) + Operands.push_back(MulOp); + else + TermsHasInRegionInst = true; + + } else { + Operands.push_back(MulOp); + } + } + } + Terms.push_back(SE->getMulExpr(Operands)); + } + } + } } // Second step: find array shape. @@ -517,7 +551,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { MapInsnToMemAcc TempMemoryAccesses; for (const auto &Pair : Context.Accesses[BasePointer]) { const Instruction *Insn = Pair.first; - const SCEVAddRecExpr *AF = dyn_cast<SCEVAddRecExpr>(Pair.second); + auto *AF = Pair.second; bool IsNonAffine = false; TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; diff --git a/polly/test/ScopInfo/multidim_srem.ll b/polly/test/ScopInfo/multidim_srem.ll new file mode 100644 index 00000000000..7f181a07c57 --- /dev/null +++ b/polly/test/ScopInfo/multidim_srem.ll @@ -0,0 +1,92 @@ +; RUN: opt %loadPolly -analyze -polly-scops -S < %s | FileCheck %s +; +; void foo(long n, float A[][n][n]) { +; for (long i = 0; i < 200; i++) +; for (long j = 0; j < n; j++) +; for (long k = 0; k < n; k++) +; A[i % 2][j][k] += 10; +; } +; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 >= 0 and o0 <= 1) }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 >= 0 and o0 <= 1) }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + + +define void @foo(i64 %n, float* %A) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.cond.1.preheader + +for.cond.1.preheader: ; preds = %entry.split, %for.inc.14 + %i.06 = phi i64 [ 0, %entry.split ], [ %inc15, %for.inc.14 ] + %cmp2.3 = icmp sgt i64 %n, 0 + br i1 %cmp2.3, label %for.cond.5.preheader.lr.ph, label %for.inc.14 + +for.cond.5.preheader.lr.ph: ; preds = %for.cond.1.preheader + br label %for.cond.5.preheader + +for.cond.5.preheader: ; preds = %for.cond.5.preheader.lr.ph, %for.inc.11 + %j.04 = phi i64 [ 0, %for.cond.5.preheader.lr.ph ], [ %inc12, %for.inc.11 ] + %cmp6.1 = icmp sgt i64 %n, 0 + br i1 %cmp6.1, label %for.body.8.lr.ph, label %for.inc.11 + +for.body.8.lr.ph: ; preds = %for.cond.5.preheader + br label %for.body.8 + +for.body.8: ; preds = %for.body.8.lr.ph, %for.body.8 + %k.02 = phi i64 [ 0, %for.body.8.lr.ph ], [ %inc, %for.body.8 ] + %rem = srem i64 %i.06, 2 + %0 = mul nuw i64 %n, %n + %1 = mul nsw i64 %0, %rem + %arrayidx = getelementptr inbounds float, float* %A, i64 %1 + %2 = mul nsw i64 %j.04, %n + %arrayidx9 = getelementptr inbounds float, float* %arrayidx, i64 %2 + %arrayidx10 = getelementptr inbounds float, float* %arrayidx9, i64 %k.02 + %3 = load float, float* %arrayidx10, align 4, !tbaa !1 + %add = fadd float %3, 1.000000e+01 + store float %add, float* %arrayidx10, align 4, !tbaa !1 + %inc = add nuw nsw i64 %k.02, 1 + %exitcond = icmp ne i64 %inc, %n + br i1 %exitcond, label %for.body.8, label %for.cond.5.for.inc.11_crit_edge + +for.cond.5.for.inc.11_crit_edge: ; preds = %for.body.8 + br label %for.inc.11 + +for.inc.11: ; preds = %for.cond.5.for.inc.11_crit_edge, %for.cond.5.preheader + %inc12 = add nuw nsw i64 %j.04, 1 + %exitcond7 = icmp ne i64 %inc12, %n + br i1 %exitcond7, label %for.cond.5.preheader, label %for.cond.1.for.inc.14_crit_edge + +for.cond.1.for.inc.14_crit_edge: ; preds = %for.inc.11 + br label %for.inc.14 + +for.inc.14: ; preds = %for.cond.1.for.inc.14_crit_edge, %for.cond.1.preheader + %inc15 = add nuw nsw i64 %i.06, 1 + %exitcond8 = icmp ne i64 %inc15, 200 + br i1 %exitcond8, label %for.cond.1.preheader, label %for.end.16 + +for.end.16: ; preds = %for.inc.14 + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.7.0 (trunk 240923) (llvm/trunk 240924)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"float", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} |

