diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 167 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/TargetLoweringBase.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 2 |
6 files changed, 111 insertions, 81 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index fee31bc5844..66ac1798208 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -77,7 +77,6 @@ STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); -STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); static cl::opt<bool> DisableBranchOpts( @@ -217,7 +216,6 @@ class TypePromotionTransaction; bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB); bool placeDbgValues(Function &F); - bool sinkAndCmp(Function &F); bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl<Instruction *> &Exts, @@ -295,14 +293,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // find a node corresponding to the value. EverMadeChange |= placeDbgValues(F); - // If there is a mask, compare against zero, and branch that can be combined - // into a single target instruction, push the mask and compare into branch - // users. Do this before OptimizeBlock -> OptimizeInst -> - // OptimizeCmpExpression, which perturbs the pattern being searched for. - if (!DisableBranchOpts) { - EverMadeChange |= sinkAndCmp(F); + if (!DisableBranchOpts) EverMadeChange |= splitBranchCondition(F); - } bool MadeChange = true; while (MadeChange) { @@ -1095,6 +1087,83 @@ static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) { return false; } +/// Duplicate and sink the given 'and' instruction into user blocks where it is +/// used in a compare to allow isel to generate better code for targets where +/// this operation can be combined. +/// +/// Return true if any changes are made. +static bool sinkAndCmp0Expression(Instruction *AndI, + const TargetLowering &TLI, + SetOfInstrs &InsertedInsts) { + // Double-check that we're not trying to optimize an instruction that was + // already optimized by some other part of this pass. + assert(!InsertedInsts.count(AndI) && + "Attempting to optimize already optimized and instruction"); + (void) InsertedInsts; + + // Nothing to do for single use in same basic block. + if (AndI->hasOneUse() && + AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent()) + return false; + + // Try to avoid cases where sinking/duplicating is likely to increase register + // pressure. + if (!isa<ConstantInt>(AndI->getOperand(0)) && + !isa<ConstantInt>(AndI->getOperand(1)) && + AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse()) + return false; + + for (auto *U : AndI->users()) { + Instruction *User = cast<Instruction>(U); + + // Only sink for and mask feeding icmp with 0. + if (!isa<ICmpInst>(User)) + return false; + + auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1)); + if (!CmpC || !CmpC->isZero()) + return false; + } + + if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI)) + return false; + + DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); + DEBUG(AndI->getParent()->dump()); + + // Push the 'and' into the same block as the icmp 0. There should only be + // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any + // others, so we don't need to keep track of which BBs we insert into. + for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n"); + + // Keep the 'and' in the same place if the use is already in the same block. + Instruction *InsertPt = + User->getParent() == AndI->getParent() ? AndI : User; + Instruction *InsertedAnd = + BinaryOperator::Create(Instruction::And, AndI->getOperand(0), + AndI->getOperand(1), "", InsertPt); + // Propagate the debug info. + InsertedAnd->setDebugLoc(AndI->getDebugLoc()); + + // Replace a use of the 'and' with a use of the new 'and'. + TheUse = InsertedAnd; + ++NumAndUses; + DEBUG(User->getParent()->dump()); + } + + // We removed all uses, nuke the and. + AndI->eraseFromParent(); + return true; +} + /// Check if the candidates could be combined with a shift instruction, which /// includes: /// 1. Truncate instruction @@ -4544,13 +4613,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) return false; - // Skip loads we've already transformed or have no reason to transform. - if (Load->hasOneUse()) { - User *LoadUser = *Load->user_begin(); - if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() && - !dyn_cast<PHINode>(LoadUser)) - return false; - } + // Skip loads we've already transformed. + if (Load->hasOneUse() && + InsertedInsts.count(cast<Instruction>(*Load->user_begin()))) + return false; // Look at all uses of Load, looking through phis, to determine how many bits // of the loaded value are needed. @@ -4646,6 +4712,9 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { IRBuilder<> Builder(Load->getNextNode()); auto *NewAnd = dyn_cast<Instruction>( Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); + // Mark this instruction as "inserted by CGP", so that other + // optimizations don't touch it. + InsertedInsts.insert(NewAnd); // Replace all uses of load with new and (except for the use of load in the // new and itself). @@ -5560,6 +5629,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); + if (BinOp && (BinOp->getOpcode() == Instruction::And) && + EnableAndCmpSinking && TLI) + return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts); + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || BinOp->getOpcode() == Instruction::LShr)) { ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); @@ -5689,68 +5762,6 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { return MadeChange; } -// If there is a sequence that branches based on comparing a single bit -// against zero that can be combined into a single instruction, and the -// target supports folding these into a single instruction, sink the -// mask and compare into the branch uses. Do this before OptimizeBlock -> -// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being -// searched for. -bool CodeGenPrepare::sinkAndCmp(Function &F) { - if (!EnableAndCmpSinking) - return false; - if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) - return false; - bool MadeChange = false; - for (BasicBlock &BB : F) { - // Does this BB end with the following? - // %andVal = and %val, #single-bit-set - // %icmpVal = icmp %andResult, 0 - // br i1 %cmpVal label %dest1, label %dest2" - BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator()); - if (!Brcc || !Brcc->isConditional()) - continue; - ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); - if (!Cmp || Cmp->getParent() != &BB) - continue; - ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); - if (!Zero || !Zero->isZero()) - continue; - Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); - if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB) - continue; - ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); - if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) - continue; - DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump()); - - // Push the "and; icmp" for any users that are conditional branches. - // Since there can only be one branch use per BB, we don't need to keep - // track of which BBs we insert into. - for (Use &TheUse : Cmp->uses()) { - // Find brcc use. - BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse); - if (!BrccUser || !BrccUser->isConditional()) - continue; - BasicBlock *UserBB = BrccUser->getParent(); - if (UserBB == &BB) continue; - DEBUG(dbgs() << "found Brcc use\n"); - - // Sink the "and; icmp" to use. - MadeChange = true; - BinaryOperator *NewAnd = - BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", - BrccUser); - CmpInst *NewCmp = - CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, - "", BrccUser); - TheUse = NewCmp; - ++NumAndCmpsMoved; - DEBUG(BrccUser->getParent()->dump()); - } - } - return MadeChange; -} - /// \brief Scale down both weights to fit into uint32_t. static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index f18cdca4861..5f021a17612 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -838,7 +838,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { HasExtractBitsInsn = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; - MaskAndBranchFoldingIsLegal = false; EnableExtLdPromotion = false; HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 92094452bf3..196274c50ef 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -555,8 +555,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setSchedulingPreference(Sched::Hybrid); - // Enable TBZ/TBNZ - MaskAndBranchFoldingIsLegal = true; EnableExtLdPromotion = true; // Set required alignment. @@ -10644,6 +10642,19 @@ Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) cons Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0)); } +bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + // Only sink 'and' mask to cmp use block if it is masking a single bit, since + // this is likely to be fold the and/cmp/br into a single tbz instruction. It + // may be beneficial to sink in other cases, but we would have to check that + // the cmp would not get folded into the br to form a cbz for these to be + // beneficial. + ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); + if (!Mask) + return false; + return Mask->getUniqueInteger().isPowerOf2(); +} + void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { // Update IsSplitCSR in AArch64unctionInfo. AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 167b78dde4b..2cdc754205b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -412,6 +412,8 @@ public: return true; } + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + bool hasAndNotCompare(SDValue) const override { // 'bics' return true; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 42b3b5ad12f..c03d6aff5a8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4455,6 +4455,11 @@ bool X86TargetLowering::isCtlzFast() const { return Subtarget.hasFastLZCNT(); } +bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + return true; +} + bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { if (!Subtarget.hasBMI()) return false; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 30ec3aa30c0..e783d9e1e92 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -808,6 +808,8 @@ namespace llvm { return false; } + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + bool hasAndNotCompare(SDValue Y) const override; /// Return the value type to use for ISD::SETCC. |

