diff options
| author | Tobias Grosser <tobias@grosser.es> | 2015-06-29 14:44:22 +0000 | 
|---|---|---|
| committer | Tobias Grosser <tobias@grosser.es> | 2015-06-29 14:44:22 +0000 | 
| commit | 1b13ddea50d4ef62382c35d979b3b002c1ad4be9 (patch) | |
| tree | b4df13603f961702054eab9b0a20e9010820a593 | |
| parent | 23bceb2eecdc0eec9b7e7548b6155d1833095dc4 (diff) | |
| download | bcm5719-llvm-1b13ddea50d4ef62382c35d979b3b002c1ad4be9.tar.gz bcm5719-llvm-1b13ddea50d4ef62382c35d979b3b002c1ad4be9.zip | |
Add first support to delinearize A[t%2][i][j]
This is very preliminary support, but it seems to work for the most common case.
When observing more/different test cases, we can work on generalizing this.
llvm-svn: 240955
| -rw-r--r-- | polly/lib/Analysis/ScopDetection.cpp | 42 | ||||
| -rw-r--r-- | polly/test/ScopInfo/multidim_srem.ll | 92 | 
2 files changed, 130 insertions, 4 deletions
| diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index e5c60efcf89..36792e50069 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -470,10 +470,44 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {      // First step: collect parametric terms in all array references.      SmallVector<const SCEV *, 4> Terms;      for (const auto &Pair : Context.Accesses[BasePointer]) { -      const SCEVAddRecExpr *AF = dyn_cast<SCEVAddRecExpr>(Pair.second); - -      if (AF) +      if (auto *AF = dyn_cast<SCEVAddRecExpr>(Pair.second))          SE->collectParametricTerms(AF, Terms); + +      // In case the outermost expression is a plain add, we check if any of its +      // terms has the form 4 * %inst * %param * %param ..., aka a term that +      // contains a product between a parameter and an instruction that is +      // inside the scop. Such instructions, if allowed at all, are instructions +      // SCEV can not represent, but Polly is still looking through. As a +      // result, these instructions can depend on induction variables and are +      // most likely no array sizes. However, terms that are multiplied with +      // them are likely candidates for array sizes. +      if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { +        for (auto Op : AF->operands()) { +          if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) +            SE->collectParametricTerms(AF2, Terms); +          if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { +            SmallVector<const SCEV *, 0> Operands; +            bool TermsHasInRegionInst = false; + +            for (auto *MulOp : AF2->operands()) { +              if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) +                Operands.push_back(Const); +              if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { +                if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { +                  if (!Context.CurRegion.contains(Inst)) +                    Operands.push_back(MulOp); +                  else +                    TermsHasInRegionInst = true; + +                } else { +                  Operands.push_back(MulOp); +                } +              } +            } +            Terms.push_back(SE->getMulExpr(Operands)); +          } +        } +      }      }      // Second step: find array shape. @@ -517,7 +551,7 @@ bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {      MapInsnToMemAcc TempMemoryAccesses;      for (const auto &Pair : Context.Accesses[BasePointer]) {        const Instruction *Insn = Pair.first; -      const SCEVAddRecExpr *AF = dyn_cast<SCEVAddRecExpr>(Pair.second); +      auto *AF = Pair.second;        bool IsNonAffine = false;        TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));        MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; diff --git a/polly/test/ScopInfo/multidim_srem.ll b/polly/test/ScopInfo/multidim_srem.ll new file mode 100644 index 00000000000..7f181a07c57 --- /dev/null +++ b/polly/test/ScopInfo/multidim_srem.ll @@ -0,0 +1,92 @@ +; RUN: opt %loadPolly -analyze -polly-scops -S < %s | FileCheck %s +; +;    void foo(long n, float A[][n][n]) { +;      for (long i = 0; i < 200; i++) +;        for (long j = 0; j < n; j++) +;          for (long k = 0; k < n; k++) +;            A[i % 2][j][k] += 10; +;    } +; +; CHECK: ReadAccess :=	[Reduction Type: NONE] [Scalar: 0] +; CHECK:     [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 >= 0 and o0 <= 1) }; +; CHECK: MustWriteAccess :=	[Reduction Type: NONE] [Scalar: 0] +; CHECK:     [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 >= 0 and o0 <= 1) }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + + +define void @foo(i64 %n, float* %A) #0 { +entry: +  br label %entry.split + +entry.split:                                      ; preds = %entry +  br label %for.cond.1.preheader + +for.cond.1.preheader:                             ; preds = %entry.split, %for.inc.14 +  %i.06 = phi i64 [ 0, %entry.split ], [ %inc15, %for.inc.14 ] +  %cmp2.3 = icmp sgt i64 %n, 0 +  br i1 %cmp2.3, label %for.cond.5.preheader.lr.ph, label %for.inc.14 + +for.cond.5.preheader.lr.ph:                       ; preds = %for.cond.1.preheader +  br label %for.cond.5.preheader + +for.cond.5.preheader:                             ; preds = %for.cond.5.preheader.lr.ph, %for.inc.11 +  %j.04 = phi i64 [ 0, %for.cond.5.preheader.lr.ph ], [ %inc12, %for.inc.11 ] +  %cmp6.1 = icmp sgt i64 %n, 0 +  br i1 %cmp6.1, label %for.body.8.lr.ph, label %for.inc.11 + +for.body.8.lr.ph:                                 ; preds = %for.cond.5.preheader +  br label %for.body.8 + +for.body.8:                                       ; preds = %for.body.8.lr.ph, %for.body.8 +  %k.02 = phi i64 [ 0, %for.body.8.lr.ph ], [ %inc, %for.body.8 ] +  %rem = srem i64 %i.06, 2 +  %0 = mul nuw i64 %n, %n +  %1 = mul nsw i64 %0, %rem +  %arrayidx = getelementptr inbounds float, float* %A, i64 %1 +  %2 = mul nsw i64 %j.04, %n +  %arrayidx9 = getelementptr inbounds float, float* %arrayidx, i64 %2 +  %arrayidx10 = getelementptr inbounds float, float* %arrayidx9, i64 %k.02 +  %3 = load float, float* %arrayidx10, align 4, !tbaa !1 +  %add = fadd float %3, 1.000000e+01 +  store float %add, float* %arrayidx10, align 4, !tbaa !1 +  %inc = add nuw nsw i64 %k.02, 1 +  %exitcond = icmp ne i64 %inc, %n +  br i1 %exitcond, label %for.body.8, label %for.cond.5.for.inc.11_crit_edge + +for.cond.5.for.inc.11_crit_edge:                  ; preds = %for.body.8 +  br label %for.inc.11 + +for.inc.11:                                       ; preds = %for.cond.5.for.inc.11_crit_edge, %for.cond.5.preheader +  %inc12 = add nuw nsw i64 %j.04, 1 +  %exitcond7 = icmp ne i64 %inc12, %n +  br i1 %exitcond7, label %for.cond.5.preheader, label %for.cond.1.for.inc.14_crit_edge + +for.cond.1.for.inc.14_crit_edge:                  ; preds = %for.inc.11 +  br label %for.inc.14 + +for.inc.14:                                       ; preds = %for.cond.1.for.inc.14_crit_edge, %for.cond.1.preheader +  %inc15 = add nuw nsw i64 %i.06, 1 +  %exitcond8 = icmp ne i64 %inc15, 200 +  br i1 %exitcond8, label %for.cond.1.preheader, label %for.end.16 + +for.end.16:                                       ; preds = %for.inc.14 +  ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.7.0 (trunk 240923) (llvm/trunk 240924)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"float", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} | 

