diff options
| author | Sam Parker <sam.parker@arm.com> | 2019-09-23 09:48:25 +0000 |
|---|---|---|
| committer | Sam Parker <sam.parker@arm.com> | 2019-09-23 09:48:25 +0000 |
| commit | 9feb429a337ff49fe119a64bff3724fb820c4501 (patch) | |
| tree | 2ab99115a6b989822c1243c41a07497fe1632313 /llvm/lib/Target | |
| parent | 14f6465c157b36c50ffe431463a9c94efda42b99 (diff) | |
| download | bcm5719-llvm-9feb429a337ff49fe119a64bff3724fb820c4501.tar.gz bcm5719-llvm-9feb429a337ff49fe119a64bff3724fb820c4501.zip | |
[ARM][MVE] Remove old tail predicates
Remove any predicate that we replace with a vctp intrinsic, and try
to remove their operands too. Also look into the exit block to see if
there's any duplicates of the predicates that we've replaced and
clone the vctp to be used there instead.
Differential Revision: https://reviews.llvm.org/D67709
llvm-svn: 372567
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/MVETailPredication.cpp | 68 |
2 files changed, 60 insertions, 9 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 04a4ee73d8b..74de3a4417f 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3986,6 +3986,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>; def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; +let hasSideEffects = 1 in class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp index 00791841566..844eafbcb38 100644 --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -84,7 +84,7 @@ private: /// Is the icmp that generates an i1 vector, based upon a loop counter /// and a limit that is defined outside the loop. - bool isTailPredicate(Value *Predicate, Value *NumElements); + bool isTailPredicate(Instruction *Predicate, Value *NumElements); }; } // end namespace @@ -178,7 +178,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { return Changed; } -bool MVETailPredication::isTailPredicate(Value *V, Value *NumElements) { +bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) { // Look for the following: // %trip.count.minus.1 = add i32 %N, -1 @@ -206,7 +206,7 @@ bool MVETailPredication::isTailPredicate(Value *V, Value *NumElements) { Instruction *Induction = nullptr; // The vector icmp - if (!match(V, m_ICmp(Pred, m_Instruction(Induction), + if (!match(I, m_ICmp(Pred, m_Instruction(Induction), m_Instruction(Shuffle))) || Pred != ICmpInst::ICMP_ULE || !L->isLoopInvariant(Shuffle)) return false; @@ -390,6 +390,55 @@ Value* MVETailPredication::ComputeElements(Value *TripCount, return Expander.expandCodeFor(Elems, Elems->getType(), InsertPt); } +// Look through the exit block to see whether there's a duplicate predicate +// instruction. This can happen when we need to perform a select on values +// from the last and previous iteration. Instead of doing a straight +// replacement of that predicate with the vctp, clone the vctp and place it +// in the block. This means that the VPR doesn't have to be live into the +// exit block which should make it easier to convert this loop into a proper +// tail predicated loop. +static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates, + SetVector<Instruction*> &MaybeDead, Loop *L) { + if (BasicBlock *Exit = L->getUniqueExitBlock()) { + for (auto &Pair : NewPredicates) { + Instruction *OldPred = Pair.first; + Instruction *NewPred = Pair.second; + + for (auto &I : *Exit) { + if (I.isSameOperationAs(OldPred)) { + Instruction *PredClone = NewPred->clone(); + PredClone->insertBefore(&I); + I.replaceAllUsesWith(PredClone); + MaybeDead.insert(&I); + break; + } + } + } + } + + // Drop references and add operands to check for dead. + SmallPtrSet<Instruction*, 4> Dead; + while (!MaybeDead.empty()) { + auto *I = MaybeDead.front(); + MaybeDead.remove(I); + if (I->hasNUsesOrMore(1)) + continue; + + for (auto &U : I->operands()) { + if (auto *OpI = dyn_cast<Instruction>(U)) + MaybeDead.insert(OpI); + } + I->dropAllReferences(); + Dead.insert(I); + } + + for (auto *I : Dead) + I->eraseFromParent(); + + for (auto I : L->blocks()) + DeleteDeadPHIs(I); +} + bool MVETailPredication::TryConvert(Value *TripCount) { if (!IsPredicatedVectorLoop()) return false; @@ -400,13 +449,14 @@ bool MVETailPredication::TryConvert(Value *TripCount) { // operand is generated from an induction variable. Module *M = L->getHeader()->getModule(); Type *Ty = IntegerType::get(M->getContext(), 32); - SmallPtrSet<Value*, 4> Predicates; + SetVector<Instruction*> Predicates; + DenseMap<Instruction*, Instruction*> NewPredicates; for (auto *I : MaskedInsts) { Intrinsic::ID ID = I->getIntrinsicID(); unsigned PredOp = ID == Intrinsic::masked_load ? 2 : 3; - Value *Predicate = I->getArgOperand(PredOp); - if (Predicates.count(Predicate)) + auto *Predicate = dyn_cast<Instruction>(I->getArgOperand(PredOp)); + if (!Predicate || Predicates.count(Predicate)) continue; VectorType *VecTy = getVectorType(I); @@ -445,6 +495,7 @@ bool MVETailPredication::TryConvert(Value *TripCount) { Value *Remaining = Builder.CreateSub(Processed, Factor); Value *TailPredicate = Builder.CreateCall(VCTP, Remaining); Predicate->replaceAllUsesWith(TailPredicate); + NewPredicates[Predicate] = cast<Instruction>(TailPredicate); // Add the incoming value to the new phi. Processed->addIncoming(Remaining, L->getLoopLatch()); @@ -453,9 +504,8 @@ bool MVETailPredication::TryConvert(Value *TripCount) { << "TP: Inserted VCTP: " << *TailPredicate << "\n"); } - for (auto I : L->blocks()) - DeleteDeadPHIs(I); - + // Now clean up. + Cleanup(NewPredicates, Predicates, L); return true; } |

