diff options
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index aba0d580916..55a46377529 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -3978,6 +3978,119 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); } +// See if we can speculate calls to intrinsic cttz/ctlz. +// +// Example: +// entry: +// ... +// %cmp = icmp eq i64 %val, 0 +// br i1 %cmp, label %end.bb, label %then.bb +// +// then.bb: +// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) +// br label %EndBB +// +// end.bb: +// %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ] +// +// ==> +// +// entry: +// ... +// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false) +// +static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) { + assert(BrInst->isConditional() && "Expected a conditional branch!"); + BasicBlock *ThenBB = BrInst->getSuccessor(1); + BasicBlock *EndBB = BrInst->getSuccessor(0); + + // See if ThenBB contains only one instruction (excluding the + // terminator and DbgInfoIntrinsic calls). + IntrinsicInst *II = nullptr; + for (BasicBlock::iterator I = ThenBB->begin(), + E = std::prev(ThenBB->end()); I != E; ++I) { + // Skip debug info. + if (isa<DbgInfoIntrinsic>(I)) + continue; + + if (II) + // Avoid speculating more than one instruction. + return false; + + // See if this is a call to intrinsic cttz/ctlz. + if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) { + // Avoid speculating expensive intrinsic calls. + if (!TLI.isCheapToSpeculateCttz()) + return false; + } + else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) { + // Avoid speculating expensive intrinsic calls. + if (!TLI.isCheapToSpeculateCtlz()) + return false; + } else + return false; + + II = cast<IntrinsicInst>(I); + } + + // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'. + BasicBlock *EntryBB = BrInst->getParent(); + for (BasicBlock::iterator I = EndBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + Value *ThenV = PN->getIncomingValueForBlock(ThenBB); + Value *OrigV = PN->getIncomingValueForBlock(EntryBB); + + if (!OrigV || ThenV != II) + return false; + + if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) { + unsigned BitWidth = ThenV->getType()->getIntegerBitWidth(); + + // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz + // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits + // of the value in input to the cttz/ctlz. + if (CInt->getValue() != BitWidth) + return false; + + // Hoist the call to cttz/ctlz from ThenBB into EntryBB. + EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(), + ThenBB->begin(), std::prev(ThenBB->end())); + + // Update PN setting ThenV as the incoming value from both 'EntryBB' + // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this + // phi node if all the incoming values are the same. + PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV); + PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV); + + // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call. + if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) { + Type *Ty = II->getArgOperand(0)->getType(); + Value *Args[] = { II->getArgOperand(0), + ConstantInt::getFalse(II->getContext()) }; + Module *M = EntryBB->getParent()->getParent(); + Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty); + IRBuilder<> Builder(BrInst); + Instruction *NewI = Builder.CreateCall(IF, Args); + + // Replace the old call to cttz/ctlz. + II->replaceAllUsesWith(NewI); + II->eraseFromParent(); + } + + // Update BrInst condition so that the branch to EndBB is always taken. + // Later on, method 'ConstantFoldTerminator' will simplify this branch + // replacing it with a direct branch to 'EndBB'. + // As a side effect, CodeGenPrepare will attempt to simplify the control + // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into + // 'EndBB' (calling method 'EliminateFallThrough'). + BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext())); + return true; + } + } + + return false; +} + /// Some targets can do store(extractelement) with one instruction. /// Try to push the extractelement towards the stores when the target /// has this feature and this is profitable. @@ -4130,6 +4243,34 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { if (isa<ExtractElementInst>(I)) return OptimizeExtractElementInst(I); + if (BranchInst *BI = dyn_cast<BranchInst>(I)) { + if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) { + // Check if the branch condition compares a value agaist zero. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { + if (ICI->getPredicate() == ICmpInst::ICMP_EQ && + match(ICI->getOperand(1), m_Zero())) { + BasicBlock *ThenBB = BI->getSuccessor(1); + BasicBlock *EndBB = BI->getSuccessor(0); + + // Check if ThenBB is only reachable from this basic block; also, + // check if EndBB has more than one predecessor. + if (ThenBB->getSinglePredecessor() && + !EndBB->getSinglePredecessor()) { + TerminatorInst *TI = ThenBB->getTerminator(); + + if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB && + // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'. + OptimizeBranchInst(BI, *TLI)) { + ModifiedDT = true; + return true; + } + } + } + } + } + return false; + } + return false; } |