diff options
Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 161 |
1 files changed, 71 insertions, 90 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 524c3708e7b..90b552035af 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5309,9 +5309,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { ConstantInt *CaseVal = CI->getCaseValue(); - if (CaseVal->getValue().ult(MinCaseVal->getValue())) + if (CaseVal->getValue().slt(MinCaseVal->getValue())) MinCaseVal = CaseVal; - if (CaseVal->getValue().ugt(MaxCaseVal->getValue())) + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) MaxCaseVal = CaseVal; // Resulting value at phi nodes for this case value. @@ -5337,7 +5337,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, } uint64_t NumResults = ResultLists[PHIs[0]].size(); - uint64_t TableSize = MaxCaseVal->getValue().getLimitedValue() + 1; + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; bool TableHasHoles = (NumResults < TableSize); // If the table has holes, we need a constant result for the default case @@ -5372,7 +5373,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Compute the table index value. Builder.SetInsertPoint(SI); - Value *TableIndex = SI->getCondition(); + Value *TableIndex; + if (MinCaseVal->isNullValue()) + TableIndex = SI->getCondition(); + else + TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); // Compute the maximum table size representable by the integer type we are // switching upon. @@ -5412,10 +5418,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); - // When doing the register-sized hole-check, unconditionally use a - // subtraction. - TableIndex = Builder.CreateSub(TableIndex, MinCaseVal); - // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid // unnecessary illegal types. uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL)); @@ -5459,11 +5461,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; StringRef FuncName = Fn->getName(); - // Base is 0 unless using a hole check - ConstantInt *Base = - NeedMask ? MinCaseVal - : ConstantInt::get(Mod.getContext(), APInt(CaseSize, 0)); - SwitchLookupTable Table(Mod, TableSize, Base, ResultList, DV, DL, FuncName); + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, + FuncName); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -5508,6 +5507,17 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, return true; } +static bool isSwitchDense(ArrayRef<int64_t> Values) { + // See also SelectionDAGBuilder::isDense(), which this function was based on. + uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); + uint64_t Range = Diff + 1; + uint64_t NumCases = Values.size(); + // 40% is the default density for building a jump table in optsize/minsize mode. + uint64_t MinDensity = 40; + + return NumCases * 100 >= Range * MinDensity; +} + /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// @@ -5519,83 +5529,58 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { - // The number of cases that need to be removed by a subtraction operation - // to make it worth using. - const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8); auto *CondTy = cast<IntegerType>(SI->getCondition()->getType()); - unsigned BitWidth = CondTy->getIntegerBitWidth(); - if (BitWidth > 64 || !DL.fitsInLegalInteger(BitWidth)) + if (CondTy->getIntegerBitWidth() > 64 || + !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) return false; // Only bother with this optimization if there are more than 3 switch cases; // SDAG will only bother creating jump tables for 4 or more cases. - // This is also useful when using the LowerSwitch transform, but not with - // so few cases. if (SI->getNumCases() < 4) return false; - // We organize the range to start from 0, if it is not already close. - SmallVector<uint64_t, 4> Values; + // This transform is agnostic to the signedness of the input or case values. We + // can treat the case values as signed or unsigned. We can optimize more common + // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values + // as signed. + SmallVector<int64_t,4> Values; for (auto &C : SI->cases()) - Values.push_back(C.getCaseValue()->getValue().getLimitedValue()); + Values.push_back(C.getCaseValue()->getValue().getSExtValue()); llvm::sort(Values); - bool MadeChanges = false; - // We must first look find the best start point, for example if we have a - // series that crosses zero: -2, -1, 0, 1, 2. - uint64_t BestDistance = - APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - - Values.back() + Values.front() + 1; - unsigned BestIndex = 0; - for (unsigned I = 1, E = Values.size(); I != E; I++) { - if (Values[I] - Values[I - 1] > BestDistance) { - BestIndex = I; - BestDistance = Values[I] - Values[I - 1]; - } - } + // If the switch is already dense, there's nothing useful to do here. + if (isSwitchDense(Values)) + return false; + + // First, transform the values such that they start at zero and ascend. + int64_t Base = Values[0]; + for (auto &V : Values) + V -= (uint64_t)(Base); - // This transform can be done speculatively because it is so cheap - it - // results in a single rotate operation being inserted. + // Now we have signed numbers that have been shifted so that, given enough + // precision, there are no negative values. Since the rest of the transform + // is bitwise only, we switch now to an unsigned representation. + uint64_t GCD = 0; + for (auto &V : Values) + GCD = GreatestCommonDivisor64(GCD, (uint64_t)V); + + // This transform can be done speculatively because it is so cheap - it results + // in a single rotate operation being inserted. This can only happen if the + // factor extracted is a power of 2. + // FIXME: If the GCD is an odd number we can multiply by the multiplicative + // inverse of GCD and then perform this transform. // FIXME: It's possible that optimizing a switch on powers of two might also // be beneficial - flag values are often powers of two and we could use a CLZ // as the key function. + if (GCD <= 1 || !isPowerOf2_64(GCD)) + // No common divisor found or too expensive to compute key function. + return false; - // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than - // one element and LLVM disallows duplicate cases, Shift is guaranteed to be - // less than 64. - unsigned Shift = 64; - // We need to store this from _before_ the transform - uint64_t BestIndexXor = Values[BestIndex]; + unsigned Shift = Log2_64(GCD); for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor)); - assert(Shift < 64); - if (Shift > 0) { - MadeChanges = true; - for (auto &V : Values) - V >>= Shift; - } - - // We Xor against Values[] (any element will do) because the if we do not - // start at zero, but also don't meet the SubThreshold, then we still might - // share common rights bits, and if this transform succeeds - // then we should insert the subtraction anyways, because the rotate trick - // below to avoid a branch needs the shifted away bits to be zero. - - // Now transform the values such that they start at zero and ascend. Do not - // do this if the shift reduces the lowest value to less than SubThreshold, - // or if the subtraction is less than SubThreshold and it does not enable a - // rotate. - uint64_t Base = 0; - if ((BestIndexXor >= SubThreshold && Shift == 0) || - (Shift > countTrailingZeros(BestIndexXor) && - Values[BestIndex] >= SubThreshold)) { - Base = BestIndexXor; - MadeChanges = true; - for (auto &V : Values) - V = (APInt(BitWidth, V) - Base).getLimitedValue(); - } - - if (!MadeChanges) - // We didn't do anything. + V = (int64_t)((uint64_t)V >> Shift); + + if (!isSwitchDense(Values)) + // Transform didn't create a dense switch. return false; // The obvious transform is to shift the switch condition right and emit a @@ -5610,22 +5595,18 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, auto *Ty = cast<IntegerType>(SI->getCondition()->getType()); Builder.SetInsertPoint(SI); - Value *Key = SI->getCondition(); - if (Base > 0) - Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base)); - if (Shift > 0) { - // FIXME replace with fshr? - auto *ShiftC = ConstantInt::get(Ty, Shift); - auto *LShr = Builder.CreateLShr(Key, ShiftC); - auto *Shl = Builder.CreateShl(Key, Ty->getBitWidth() - Shift); - Key = Builder.CreateOr(LShr, Shl); - } - SI->replaceUsesOfWith(SI->getCondition(), Key); + auto *ShiftC = ConstantInt::get(Ty, Shift); + auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + auto *LShr = Builder.CreateLShr(Sub, ShiftC); + auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); + auto *Rot = Builder.CreateOr(LShr, Shl); + SI->replaceUsesOfWith(SI->getCondition(), Rot); for (auto Case : SI->cases()) { auto *Orig = Case.getCaseValue(); - auto Sub = Orig->getValue() - Base; - Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift)))); + auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); + Case.setValue( + cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); } return true; } @@ -5666,9 +5647,6 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) return requestResimplify(); - if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return requestResimplify(); - // The conversion from switch to lookup tables results in difficult-to-analyze // code and makes pruning branches much harder. This is a problem if the // switch expression itself can still be restricted as a result of inlining or @@ -5678,6 +5656,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { SwitchToLookupTable(SI, Builder, DL, TTI)) return requestResimplify(); + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return requestResimplify(); + return false; } |