diff options
| author | Nikolai Bozhenov <nikolai.bozhenov@intel.com> | 2017-03-02 22:12:15 +0000 |
|---|---|---|
| committer | Nikolai Bozhenov <nikolai.bozhenov@intel.com> | 2017-03-02 22:12:15 +0000 |
| commit | 4a04fb9e9059bff9c00dd698c15a9b5274441ea8 (patch) | |
| tree | 10c626cbbd0389004fc9e64061653878ea19726b /llvm/lib/Transforms/Utils/BypassSlowDivision.cpp | |
| parent | 25f9f927c6eac5c51703342c7072a4b76e6c84bf (diff) | |
| download | bcm5719-llvm-4a04fb9e9059bff9c00dd698c15a9b5274441ea8.tar.gz bcm5719-llvm-4a04fb9e9059bff9c00dd698c15a9b5274441ea8.zip | |
[BypassSlowDivision] Use ValueTracking to simplify run-time checks
ValueTracking is used for more thorough analysis of operands. Based on the
analysis, either run-time checks can be simplified (e.g. check only one operand
instead of two) or the transformation can be avoided. For example, it is quite
often the case that a divisor is promoted from a shorter type and run-time
checks for it are redundant.
With additional compile-time analysis of values, two special cases naturally
arise and are addressed by the patch:
1) Both operands are known to be short enough. Then, the long division can be
simply replaced with a short one without CFG modification.
2) If a division is unsigned and the dividend is known to be short then the
long division is not needed at all. Because if the divisor is too big for
short division then the quotient is obviously zero (and the remainder is
equal to the dividend). Actually, the division is not needed when
(divisor > dividend).
Differential Revision: https://reviews.llvm.org/D29897
llvm-svn: 296832
Diffstat (limited to 'llvm/lib/Transforms/Utils/BypassSlowDivision.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Utils/BypassSlowDivision.cpp | 137 |
1 files changed, 108 insertions, 29 deletions
diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index ed663de0199..9d8cb3187ee 100644 --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -83,17 +84,28 @@ namespace llvm { } namespace { +enum ValueRange { + /// Operand definitely fits into BypassType. No runtime checks are needed. + VALRNG_SHORT, + /// A runtime check is required, as value range is unknown. + VALRNG_UNKNOWN, + /// Operand is unlikely to fit into BypassType. The bypassing should be + /// disabled. + VALRNG_LONG +}; + class FastDivInsertionTask { bool IsValidTask = false; Instruction *SlowDivOrRem = nullptr; IntegerType *BypassType = nullptr; BasicBlock *MainBB = nullptr; + ValueRange getValueRange(Value *Op); QuotRemWithBB createSlowBB(BasicBlock *Successor); QuotRemWithBB createFastBB(BasicBlock *Successor); QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, BasicBlock *PhiBB); - Value *insertOperandRuntimeCheck(); + Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); Optional<QuotRemPair> insertFastDivAndRem(); bool isSignedOp() { @@ -175,6 +187,28 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { return isDivisionOp() ? Value.Quotient : Value.Remainder; } +/// Check if an integer value fits into our bypass type. +ValueRange FastDivInsertionTask::getValueRange(Value *V) { + unsigned ShortLen = BypassType->getBitWidth(); + unsigned LongLen = V->getType()->getIntegerBitWidth(); + + assert(LongLen > ShortLen && "Value type must be wider than BypassType"); + unsigned HiBits = LongLen - ShortLen; + + const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); + APInt Zeros(LongLen, 0), Ones(LongLen, 0); + + computeKnownBits(V, Zeros, Ones, DL); + + if (Zeros.countLeadingOnes() >= HiBits) + return VALRNG_SHORT; + + if (Ones.countLeadingZeros() < HiBits) + return VALRNG_LONG; + + return VALRNG_UNKNOWN; +} + /// Add new basic block for slow div and rem operations and put it before /// SuccessorBB. QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { @@ -241,22 +275,17 @@ QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, /// Creates a runtime check to test whether both the divisor and dividend fit /// into BypassType. The check is inserted at the end of MainBB. True return -/// value means that the operands fit. -Value *FastDivInsertionTask::insertOperandRuntimeCheck() { +/// value means that the operands fit. Either of the operands may be NULL if it +/// doesn't need a runtime check. +Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { + assert((Op1 || Op2) && "Nothing to check"); IRBuilder<> Builder(MainBB, MainBB->end()); - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - - // We should have bailed out above if the divisor is a constant, but the - // dividend may still be a constant. Set OrV to our non-constant operands - // OR'ed together. - assert(!isa<ConstantInt>(Divisor)); Value *OrV; - if (!isa<ConstantInt>(Dividend)) - OrV = Builder.CreateOr(Dividend, Divisor); + if (Op1 && Op2) + OrV = Builder.CreateOr(Op1, Op2); else - OrV = Divisor; + OrV = Op1 ? Op1 : Op2; // BitMask is inverted to check if the operands are // larger than the bypass type @@ -279,22 +308,72 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { return None; } - // If the numerator is a constant, bail if it doesn't fit into BypassType. - if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend)) - if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) - return None; - - // Split the basic block before the div/rem. - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); - // Remove the unconditional branch from MainBB to SuccessorBB. - MainBB->getInstList().back().eraseFromParent(); - QuotRemWithBB Fast = createFastBB(SuccessorBB); - QuotRemWithBB Slow = createSlowBB(SuccessorBB); - QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); - Value *CmpV = insertOperandRuntimeCheck(); - IRBuilder<> Builder(MainBB, MainBB->end()); - Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); - return Result; + ValueRange DividendRange = getValueRange(Dividend); + if (DividendRange == VALRNG_LONG) + return None; + + ValueRange DivisorRange = getValueRange(Divisor); + if (DivisorRange == VALRNG_LONG) + return None; + + bool DividendShort = (DividendRange == VALRNG_SHORT); + bool DivisorShort = (DivisorRange == VALRNG_SHORT); + + if (DividendShort && DivisorShort) { + // If both operands are known to be short then just replace the long + // division with a short one in-place. + + IRBuilder<> Builder(SlowDivOrRem); + Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); + Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); + Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); + Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); + Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); + Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); + return QuotRemPair(ExtDiv, ExtRem); + } else if (DividendShort && !isSignedOp()) { + // If the division is unsigned and Dividend is known to be short, then + // either + // 1) Divisor is less or equal to Dividend, and the result can be computed + // with a short division. + // 2) Divisor is greater than Dividend. In this case, no division is needed + // at all: The quotient is 0 and the remainder is equal to Dividend. + // + // So instead of checking at runtime whether Divisor fits into BypassType, + // we emit a runtime check to differentiate between these two cases. This + // lets us entirely avoid a long div. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + QuotRemWithBB Long; + Long.BB = MainBB; + Long.Quotient = ConstantInt::get(getSlowType(), 0); + Long.Remainder = Dividend; + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); + IRBuilder<> Builder(MainBB, MainBB->end()); + Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); + Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); + return Result; + } else { + // General case. Create both slow and fast div/rem pairs and choose one of + // them at runtime. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemWithBB Slow = createSlowBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); + Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, + DivisorShort ? nullptr : Divisor); + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); + return Result; + } } /// This optimization identifies DIV/REM instructions in a BB that can be |

