summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp141
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
3 files changed, 155 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index aba0d580916..55a46377529 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -3978,6 +3978,119 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
}
+// See if we can speculate calls to intrinsic cttz/ctlz.
+//
+// Example:
+// entry:
+// ...
+// %cmp = icmp eq i64 %val, 0
+// br i1 %cmp, label %end.bb, label %then.bb
+//
+// then.bb:
+// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true)
+// br label %EndBB
+//
+// end.bb:
+// %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ]
+//
+// ==>
+//
+// entry:
+// ...
+// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false)
+//
+static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
+ assert(BrInst->isConditional() && "Expected a conditional branch!");
+ BasicBlock *ThenBB = BrInst->getSuccessor(1);
+ BasicBlock *EndBB = BrInst->getSuccessor(0);
+
+ // See if ThenBB contains only one instruction (excluding the
+ // terminator and DbgInfoIntrinsic calls).
+ IntrinsicInst *II = nullptr;
+ for (BasicBlock::iterator I = ThenBB->begin(),
+ E = std::prev(ThenBB->end()); I != E; ++I) {
+ // Skip debug info.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+
+ if (II)
+ // Avoid speculating more than one instruction.
+ return false;
+
+ // See if this is a call to intrinsic cttz/ctlz.
+ if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
+ // Avoid speculating expensive intrinsic calls.
+ if (!TLI.isCheapToSpeculateCttz())
+ return false;
+ }
+ else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) {
+ // Avoid speculating expensive intrinsic calls.
+ if (!TLI.isCheapToSpeculateCtlz())
+ return false;
+ } else
+ return false;
+
+ II = cast<IntrinsicInst>(I);
+ }
+
+ // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'.
+ BasicBlock *EntryBB = BrInst->getParent();
+ for (BasicBlock::iterator I = EndBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
+ Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
+
+ if (!OrigV || ThenV != II)
+ return false;
+
+ if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
+ unsigned BitWidth = ThenV->getType()->getIntegerBitWidth();
+
+ // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
+ // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
+ // of the value in input to the cttz/ctlz.
+ if (CInt->getValue() != BitWidth)
+ return false;
+
+ // Hoist the call to cttz/ctlz from ThenBB into EntryBB.
+ EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(),
+ ThenBB->begin(), std::prev(ThenBB->end()));
+
+ // Update PN setting ThenV as the incoming value from both 'EntryBB'
+ // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this
+ // phi node if all the incoming values are the same.
+ PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV);
+ PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV);
+
+ // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call.
+ if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) {
+ Type *Ty = II->getArgOperand(0)->getType();
+ Value *Args[] = { II->getArgOperand(0),
+ ConstantInt::getFalse(II->getContext()) };
+ Module *M = EntryBB->getParent()->getParent();
+ Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
+ IRBuilder<> Builder(BrInst);
+ Instruction *NewI = Builder.CreateCall(IF, Args);
+
+ // Replace the old call to cttz/ctlz.
+ II->replaceAllUsesWith(NewI);
+ II->eraseFromParent();
+ }
+
+ // Update BrInst condition so that the branch to EndBB is always taken.
+ // Later on, method 'ConstantFoldTerminator' will simplify this branch
+ // replacing it with a direct branch to 'EndBB'.
+ // As a side effect, CodeGenPrepare will attempt to simplify the control
+ // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into
+ // 'EndBB' (calling method 'EliminateFallThrough').
+ BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext()));
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// Some targets can do store(extractelement) with one instruction.
/// Try to push the extractelement towards the stores when the target
/// has this feature and this is profitable.
@@ -4130,6 +4243,34 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
if (isa<ExtractElementInst>(I))
return OptimizeExtractElementInst(I);
+ if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+ if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) {
+ // Check if the branch condition compares a value agaist zero.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ &&
+ match(ICI->getOperand(1), m_Zero())) {
+ BasicBlock *ThenBB = BI->getSuccessor(1);
+ BasicBlock *EndBB = BI->getSuccessor(0);
+
+ // Check if ThenBB is only reachable from this basic block; also,
+ // check if EndBB has more than one predecessor.
+ if (ThenBB->getSinglePredecessor() &&
+ !EndBB->getSinglePredecessor()) {
+ TerminatorInst *TI = ThenBB->getTerminator();
+
+ if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB &&
+ // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'.
+ OptimizeBranchInst(BI, *TLI)) {
+ ModifiedDT = true;
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+ }
+
return false;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 77a1d6e69fe..c966642ffff 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3882,6 +3882,16 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
return (Index == 0 || Index == ResVT.getVectorNumElements());
}
+bool X86TargetLowering::isCheapToSpeculateCttz() const {
+ // Don't try to speculate cttz if we can't directly use TZCNT.
+ return Subtarget->hasBMI();
+}
+
+bool X86TargetLowering::isCheapToSpeculateCtlz() const {
+ // Don't try to speculate ctlz if we can't directly use LZCNT.
+ return Subtarget->hasLZCNT();
+}
+
/// isUndefOrInRange - Return true if Val is undef or if its value falls within
/// the specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index c5e3e14c545..0b01bae9e67 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -633,6 +633,10 @@ namespace llvm {
/// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ bool isCheapToSpeculateCttz() const override;
+
+ bool isCheapToSpeculateCtlz() const override;
+
/// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
OpenPOWER on IntegriCloud