diff options
author | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2016-04-08 18:16:58 +0000 |
---|---|---|
committer | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2016-04-08 18:16:58 +0000 |
commit | 5155edc6583aee9264a94abf2c43dca936669d0c (patch) | |
tree | f11b254b60c1a6a984564ef369ec71c7a10ef8de | |
parent | a9dc5294420749b12def8f5bb6dda675598f1376 (diff) | |
download | bcm5719-llvm-5155edc6583aee9264a94abf2c43dca936669d0c.tar.gz bcm5719-llvm-5155edc6583aee9264a94abf2c43dca936669d0c.zip |
[FIX] Teach the ScopExpander about parallel subfunctions
llvm-svn: 265824
-rw-r--r-- | polly/lib/Support/ScopHelper.cpp | 19 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/OpenMP/recomputed-srem.ll | 44 |
2 files changed, 58 insertions, 5 deletions
diff --git a/polly/lib/Support/ScopHelper.cpp b/polly/lib/Support/ScopHelper.cpp index e73946f5042..469c00d52d3 100644 --- a/polly/lib/Support/ScopHelper.cpp +++ b/polly/lib/Support/ScopHelper.cpp @@ -225,9 +225,9 @@ void polly::splitEntryBlockForAlloca(BasicBlock *EntryBlock, Pass *P) { struct ScopExpander : SCEVVisitor<ScopExpander, const SCEV *> { friend struct SCEVVisitor<ScopExpander, const SCEV *>; - explicit ScopExpander(const Region &R, ScalarEvolution &SE, + explicit ScopExpander(const Region &R, Function &F, ScalarEvolution &SE, const DataLayout &DL, const char *Name, ValueMapT *VMap) - : Expander(SCEVExpander(SE, DL, Name)), SE(SE), Name(Name), R(R), + : Expander(SCEVExpander(SE, DL, Name)), F(F), SE(SE), Name(Name), R(R), VMap(VMap) {} Value *expandCodeFor(const SCEV *E, Type *Ty, Instruction *I) { @@ -241,6 +241,10 @@ struct ScopExpander : SCEVVisitor<ScopExpander, const SCEV *> { private: SCEVExpander Expander; + + /// @brief The function in which the code is placed. + Function &F; + ScalarEvolution &SE; const char *Name; const Region &R; @@ -264,10 +268,15 @@ private: Inst->getOpcode() != Instruction::SDiv)) return E; - if (!R.contains(Inst)) + // If the instruction is outside the SCoP we can just use it without the + // need to recompute it. However, if it is in another function we need to + // recompute it as the definition does not dominate the use. + bool SameFunction = (&F == R.getEntry()->getParent()); + if (!R.contains(Inst) && SameFunction) return E; - Instruction *StartIP = R.getEnteringBlock()->getTerminator(); + Instruction *StartIP = SameFunction ? R.getEnteringBlock()->getTerminator() + : F.getEntryBlock().getTerminator(); const SCEV *LHSScev = visit(SE.getSCEV(Inst->getOperand(0))); const SCEV *RHSScev = visit(SE.getSCEV(Inst->getOperand(1))); @@ -333,7 +342,7 @@ private: Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL, const char *Name, const SCEV *E, Type *Ty, Instruction *IP, ValueMapT *VMap) { - ScopExpander Expander(S.getRegion(), SE, DL, Name, VMap); + ScopExpander Expander(S.getRegion(), *IP->getFunction(), SE, DL, Name, VMap); return Expander.expandCodeFor(E, Ty, IP); } diff --git a/polly/test/Isl/CodeGen/OpenMP/recomputed-srem.ll b/polly/test/Isl/CodeGen/OpenMP/recomputed-srem.ll new file mode 100644 index 00000000000..bfbfce176f8 --- /dev/null +++ b/polly/test/Isl/CodeGen/OpenMP/recomputed-srem.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-codegen -polly-parallel \ +; RUN: -polly-parallel-force -S < %s | FileCheck %s +; +; Test to verify that we recompute %rem96 in the parallel subfunction. +; +; CHECK: %rem96polly = srem i32 %polly.subfunc.arg.n2, 16 +; CHECK-NEXT: br label %polly.par.checkNext +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @dmxpy(i32 %n1, float* %y, i32 %n2, float* %x) #0 { +entry: + %rem96 = srem i32 %n2, 16 + %0 = sext i32 %rem96 to i64 + %1 = add i64 %0, 15 + br label %for.cond195.preheader + +for.cond195.preheader: ; preds = %for.inc363, %entry + %indvars.iv262 = phi i64 [ %1, %entry ], [ %indvars.iv.next263, %for.inc363 ] + %j.0236 = phi i32 [ 0, %entry ], [ %add364, %for.inc363 ] + br label %for.body197 + +for.body197: ; preds = %for.body197, %for.cond195.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body197 ], [ 0, %for.cond195.preheader ] + %arrayidx199 = getelementptr inbounds float, float* %y, i64 %indvars.iv + %2 = add nsw i64 %indvars.iv262, -6 + %arrayidx292 = getelementptr inbounds float, float* %x, i64 %2 + %3 = load float, float* %arrayidx292, align 4 + store float undef, float* %arrayidx199, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n1 + br i1 %exitcond, label %for.body197, label %for.inc363 + +for.inc363: ; preds = %for.body197 + %add364 = add nsw i32 %j.0236, 16 + %cmp193 = icmp slt i32 %add364, %n2 + %indvars.iv.next263 = add i64 %indvars.iv262, 16 + br i1 %cmp193, label %for.cond195.preheader, label %for.end365 + +for.end365: ; preds = %for.inc363 + ret void +} |