diff options
| author | Hal Finkel <hfinkel@anl.gov> | 2015-04-11 00:33:08 +0000 |
|---|---|---|
| committer | Hal Finkel <hfinkel@anl.gov> | 2015-04-11 00:33:08 +0000 |
| commit | ffb460fdf08a5e4c85c82d34e683e85ab0102784 (patch) | |
| tree | 2e688c0af53f69fbd6675a09dddbb7abf685b528 | |
| parent | eb9bdb5d1686cc1e55d45fee419f1d07124d915e (diff) | |
| download | bcm5719-llvm-ffb460fdf08a5e4c85c82d34e683e85ab0102784.tar.gz bcm5719-llvm-ffb460fdf08a5e4c85c82d34e683e85ab0102784.zip | |
[PowerPC] Fix PPCLoopPreIncPrep for depth > 1 loops
This pass had the same problem as the data-prefetching pass: it was only
checking for depth == 1 loops in practice. Fix that, add some debugging
statements, and make sure that, when we grab an AddRec, it is for the loop we
expect.
llvm-svn: 234670
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp | 37 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/pip-inner.ll | 52 |
2 files changed, 79 insertions, 10 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index 092a4efacb1..41cc15ab419 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "ppc-loop-preinc-prep" #include "PPC.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -143,8 +144,10 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { bool MadeChange = false; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); - I != E; ++I) { + if (LI->empty()) + return MadeChange; + + for (auto I = df_begin(*LI->begin()), E = df_end(*LI->begin()); I != E; ++I) { Loop *L = *I; MadeChange |= runOnLoop(L); } @@ -159,16 +162,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!L->empty()) return MadeChange; + DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); + BasicBlock *Header = L->getHeader(); const PPCSubtarget *ST = TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr; - unsigned HeaderLoopPredCount = 0; - for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) { - ++HeaderLoopPredCount; - } + unsigned HeaderLoopPredCount = + std::distance(pred_begin(Header), pred_end(Header)); // Collect buckets of comparable addresses used by loads and stores. typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket; @@ -205,9 +207,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (L->isLoopInvariant(PtrValue)) continue; - const SCEV *LSCEV = SE->getSCEV(PtrValue); - if (!isa<SCEVAddRecExpr>(LSCEV)) + const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); + if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) { + if (LARSCEV->getLoop() != L) + continue; + } else { continue; + } bool FoundBucket = false; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) @@ -236,11 +242,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // returns a value (which might contribute to determining the loop's // iteration space), insert a new preheader for the loop. if (!LoopPredecessor || - !LoopPredecessor->getTerminator()->getType()->isVoidTy()) + !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { LoopPredecessor = InsertPreheaderForLoop(L, this); + if (LoopPredecessor) + MadeChange = true; + } if (!LoopPredecessor) return MadeChange; + DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n"); + SmallSet<BasicBlock *, 16> BBChanged; for (unsigned i = 0, e = Buckets.size(); i != e; ++i) { // The base address of each bucket is transformed into a phi and the others @@ -251,6 +262,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!BasePtrSCEV->isAffine()) continue; + DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); + assert(BasePtrSCEV->getLoop() == L && + "AddRec for the wrong loop?"); + Instruction *MemI = Buckets[i].begin()->second; Value *BasePtr = GetPointerOperand(MemI); assert(BasePtr && "No pointer operand"); @@ -271,6 +286,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { if (!isSafeToExpand(BasePtrStartSCEV, *SE)) continue; + DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); + PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount, MemI->hasName() ? MemI->getName() + ".phi" : "", Header->getFirstNonPHI()); diff --git a/llvm/test/CodeGen/PowerPC/pip-inner.ll b/llvm/test/CodeGen/PowerPC/pip-inner.ll new file mode 100644 index 00000000000..930f0d37147 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pip-inner.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @foo(double* %x, double* nocapture readonly %y) #0 { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.end, %entry + %i.015 = phi i32 [ 0, %entry ], [ %inc7, %for.end ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv + %0 = load double, double* %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds double, double* %x, i64 %indvars.iv + store double %add, double* %arrayidx5, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 16000 + br i1 %exitcond, label %for.end, label %for.body3 + +for.end: ; preds = %for.body3 + tail call void @bar(double* %x) #2 + %inc7 = add nuw nsw i32 %i.015, 1 + %exitcond16 = icmp eq i32 %inc7, 1000 + br i1 %exitcond16, label %for.end8, label %for.cond1.preheader + +for.end8: ; preds = %for.end + ret void + +; CHECK-LABEL: @foo + +; CHECK: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}}) +; CHECK: fadd [[REG2:[0-9]+]], [[REG1]], {{[0-9]+}} +; CHECK: stfdu [[REG2]], 8({{[0-9]+}}) +; CHECK: bdnz + +; CHECK: bl bar +; CHECK-NEXT: nop + +; CHECK: blr +} + +declare void @bar(double*) #1 + +attributes #0 = { nounwind "target-cpu"="a2" } +attributes #1 = { "target-cpu"="a2" } +attributes #2 = { nounwind } + |

