From 4a04fb9e9059bff9c00dd698c15a9b5274441ea8 Mon Sep 17 00:00:00 2001 From: Nikolai Bozhenov Date: Thu, 2 Mar 2017 22:12:15 +0000 Subject: [BypassSlowDivision] Use ValueTracking to simplify run-time checks ValueTracking is used for more thorough analysis of operands. Based on the analysis, either run-time checks can be simplified (e.g. check only one operand instead of two) or the transformation can be avoided. For example, it is quite often the case that a divisor is promoted from a shorter type and run-time checks for it are redundant. With additional compile-time analysis of values, two special cases naturally arise and are addressed by the patch: 1) Both operands are known to be short enough. Then, the long division can be simply replaced with a short one without CFG modification. 2) If a division is unsigned and the dividend is known to be short then the long division is not needed at all. Because if the divisor is too big for short division then the quotient is obviously zero (and the remainder is equal to the dividend). Actually, the division is not needed when (divisor > dividend). Differential Revision: https://reviews.llvm.org/D29897 llvm-svn: 296832 --- llvm/lib/Transforms/Utils/BypassSlowDivision.cpp | 137 ++++++++++++++++++----- 1 file changed, 108 insertions(+), 29 deletions(-) (limited to 'llvm/lib/Transforms/Utils/BypassSlowDivision.cpp') diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index ed663de0199..9d8cb3187ee 100644 --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -83,17 +84,28 @@ namespace llvm { } namespace { +enum ValueRange { + /// Operand definitely fits into BypassType. No runtime checks are needed. + VALRNG_SHORT, + /// A runtime check is required, as value range is unknown. + VALRNG_UNKNOWN, + /// Operand is unlikely to fit into BypassType. The bypassing should be + /// disabled. + VALRNG_LONG +}; + class FastDivInsertionTask { bool IsValidTask = false; Instruction *SlowDivOrRem = nullptr; IntegerType *BypassType = nullptr; BasicBlock *MainBB = nullptr; + ValueRange getValueRange(Value *Op); QuotRemWithBB createSlowBB(BasicBlock *Successor); QuotRemWithBB createFastBB(BasicBlock *Successor); QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, BasicBlock *PhiBB); - Value *insertOperandRuntimeCheck(); + Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); Optional insertFastDivAndRem(); bool isSignedOp() { @@ -175,6 +187,28 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { return isDivisionOp() ? Value.Quotient : Value.Remainder; } +/// Check if an integer value fits into our bypass type. +ValueRange FastDivInsertionTask::getValueRange(Value *V) { + unsigned ShortLen = BypassType->getBitWidth(); + unsigned LongLen = V->getType()->getIntegerBitWidth(); + + assert(LongLen > ShortLen && "Value type must be wider than BypassType"); + unsigned HiBits = LongLen - ShortLen; + + const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); + APInt Zeros(LongLen, 0), Ones(LongLen, 0); + + computeKnownBits(V, Zeros, Ones, DL); + + if (Zeros.countLeadingOnes() >= HiBits) + return VALRNG_SHORT; + + if (Ones.countLeadingZeros() < HiBits) + return VALRNG_LONG; + + return VALRNG_UNKNOWN; +} + /// Add new basic block for slow div and rem operations and put it before /// SuccessorBB. QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { @@ -241,22 +275,17 @@ QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, /// Creates a runtime check to test whether both the divisor and dividend fit /// into BypassType. The check is inserted at the end of MainBB. True return -/// value means that the operands fit. -Value *FastDivInsertionTask::insertOperandRuntimeCheck() { +/// value means that the operands fit. Either of the operands may be NULL if it +/// doesn't need a runtime check. +Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { + assert((Op1 || Op2) && "Nothing to check"); IRBuilder<> Builder(MainBB, MainBB->end()); - Value *Dividend = SlowDivOrRem->getOperand(0); - Value *Divisor = SlowDivOrRem->getOperand(1); - - // We should have bailed out above if the divisor is a constant, but the - // dividend may still be a constant. Set OrV to our non-constant operands - // OR'ed together. - assert(!isa(Divisor)); Value *OrV; - if (!isa(Dividend)) - OrV = Builder.CreateOr(Dividend, Divisor); + if (Op1 && Op2) + OrV = Builder.CreateOr(Op1, Op2); else - OrV = Divisor; + OrV = Op1 ? Op1 : Op2; // BitMask is inverted to check if the operands are // larger than the bypass type @@ -279,22 +308,72 @@ Optional FastDivInsertionTask::insertFastDivAndRem() { return None; } - // If the numerator is a constant, bail if it doesn't fit into BypassType. - if (ConstantInt *ConstDividend = dyn_cast(Dividend)) - if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) - return None; - - // Split the basic block before the div/rem. - BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); - // Remove the unconditional branch from MainBB to SuccessorBB. - MainBB->getInstList().back().eraseFromParent(); - QuotRemWithBB Fast = createFastBB(SuccessorBB); - QuotRemWithBB Slow = createSlowBB(SuccessorBB); - QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); - Value *CmpV = insertOperandRuntimeCheck(); - IRBuilder<> Builder(MainBB, MainBB->end()); - Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); - return Result; + ValueRange DividendRange = getValueRange(Dividend); + if (DividendRange == VALRNG_LONG) + return None; + + ValueRange DivisorRange = getValueRange(Divisor); + if (DivisorRange == VALRNG_LONG) + return None; + + bool DividendShort = (DividendRange == VALRNG_SHORT); + bool DivisorShort = (DivisorRange == VALRNG_SHORT); + + if (DividendShort && DivisorShort) { + // If both operands are known to be short then just replace the long + // division with a short one in-place. + + IRBuilder<> Builder(SlowDivOrRem); + Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); + Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); + Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); + Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); + Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); + Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); + return QuotRemPair(ExtDiv, ExtRem); + } else if (DividendShort && !isSignedOp()) { + // If the division is unsigned and Dividend is known to be short, then + // either + // 1) Divisor is less or equal to Dividend, and the result can be computed + // with a short division. + // 2) Divisor is greater than Dividend. In this case, no division is needed + // at all: The quotient is 0 and the remainder is equal to Dividend. + // + // So instead of checking at runtime whether Divisor fits into BypassType, + // we emit a runtime check to differentiate between these two cases. This + // lets us entirely avoid a long div. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + QuotRemWithBB Long; + Long.BB = MainBB; + Long.Quotient = ConstantInt::get(getSlowType(), 0); + Long.Remainder = Dividend; + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); + IRBuilder<> Builder(MainBB, MainBB->end()); + Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); + Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); + return Result; + } else { + // General case. Create both slow and fast div/rem pairs and choose one of + // them at runtime. + + // Split the basic block before the div/rem. + BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); + // Remove the unconditional branch from MainBB to SuccessorBB. + MainBB->getInstList().back().eraseFromParent(); + QuotRemWithBB Fast = createFastBB(SuccessorBB); + QuotRemWithBB Slow = createSlowBB(SuccessorBB); + QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); + Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, + DivisorShort ? nullptr : Divisor); + IRBuilder<> Builder(MainBB, MainBB->end()); + Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); + return Result; + } } /// This optimization identifies DIV/REM instructions in a BB that can be -- cgit v1.2.3