summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp161
1 files changed, 71 insertions, 90 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 524c3708e7b..90b552035af 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5309,9 +5309,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
ConstantInt *CaseVal = CI->getCaseValue();
- if (CaseVal->getValue().ult(MinCaseVal->getValue()))
+ if (CaseVal->getValue().slt(MinCaseVal->getValue()))
MinCaseVal = CaseVal;
- if (CaseVal->getValue().ugt(MaxCaseVal->getValue()))
+ if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
MaxCaseVal = CaseVal;
// Resulting value at phi nodes for this case value.
@@ -5337,7 +5337,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
}
uint64_t NumResults = ResultLists[PHIs[0]].size();
- uint64_t TableSize = MaxCaseVal->getValue().getLimitedValue() + 1;
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
bool TableHasHoles = (NumResults < TableSize);
// If the table has holes, we need a constant result for the default case
@@ -5372,7 +5373,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Compute the table index value.
Builder.SetInsertPoint(SI);
- Value *TableIndex = SI->getCondition();
+ Value *TableIndex;
+ if (MinCaseVal->isNullValue())
+ TableIndex = SI->getCondition();
+ else
+ TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+ "switch.tableidx");
// Compute the maximum table size representable by the integer type we are
// switching upon.
@@ -5412,10 +5418,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
CommonDest->getParent(), CommonDest);
- // When doing the register-sized hole-check, unconditionally use a
- // subtraction.
- TableIndex = Builder.CreateSub(TableIndex, MinCaseVal);
-
// Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
// unnecessary illegal types.
uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
@@ -5459,11 +5461,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If using a bitmask, use any value to fill the lookup table holes.
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
StringRef FuncName = Fn->getName();
- // Base is 0 unless using a hole check
- ConstantInt *Base =
- NeedMask ? MinCaseVal
- : ConstantInt::get(Mod.getContext(), APInt(CaseSize, 0));
- SwitchLookupTable Table(Mod, TableSize, Base, ResultList, DV, DL, FuncName);
+ SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
+ FuncName);
Value *Result = Table.BuildLookup(TableIndex, Builder);
@@ -5508,6 +5507,17 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
return true;
}
+static bool isSwitchDense(ArrayRef<int64_t> Values) {
+ // See also SelectionDAGBuilder::isDense(), which this function was based on.
+ uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
+ uint64_t Range = Diff + 1;
+ uint64_t NumCases = Values.size();
+ // 40% is the default density for building a jump table in optsize/minsize mode.
+ uint64_t MinDensity = 40;
+
+ return NumCases * 100 >= Range * MinDensity;
+}
+
/// Try to transform a switch that has "holes" in it to a contiguous sequence
/// of cases.
///
@@ -5519,83 +5529,58 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
- // The number of cases that need to be removed by a subtraction operation
- // to make it worth using.
- const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8);
auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
- unsigned BitWidth = CondTy->getIntegerBitWidth();
- if (BitWidth > 64 || !DL.fitsInLegalInteger(BitWidth))
+ if (CondTy->getIntegerBitWidth() > 64 ||
+ !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
return false;
// Only bother with this optimization if there are more than 3 switch cases;
// SDAG will only bother creating jump tables for 4 or more cases.
- // This is also useful when using the LowerSwitch transform, but not with
- // so few cases.
if (SI->getNumCases() < 4)
return false;
- // We organize the range to start from 0, if it is not already close.
- SmallVector<uint64_t, 4> Values;
+ // This transform is agnostic to the signedness of the input or case values. We
+ // can treat the case values as signed or unsigned. We can optimize more common
+ // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
+ // as signed.
+ SmallVector<int64_t,4> Values;
for (auto &C : SI->cases())
- Values.push_back(C.getCaseValue()->getValue().getLimitedValue());
+ Values.push_back(C.getCaseValue()->getValue().getSExtValue());
llvm::sort(Values);
- bool MadeChanges = false;
- // We must first look find the best start point, for example if we have a
- // series that crosses zero: -2, -1, 0, 1, 2.
- uint64_t BestDistance =
- APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() -
- Values.back() + Values.front() + 1;
- unsigned BestIndex = 0;
- for (unsigned I = 1, E = Values.size(); I != E; I++) {
- if (Values[I] - Values[I - 1] > BestDistance) {
- BestIndex = I;
- BestDistance = Values[I] - Values[I - 1];
- }
- }
+ // If the switch is already dense, there's nothing useful to do here.
+ if (isSwitchDense(Values))
+ return false;
+
+ // First, transform the values such that they start at zero and ascend.
+ int64_t Base = Values[0];
+ for (auto &V : Values)
+ V -= (uint64_t)(Base);
- // This transform can be done speculatively because it is so cheap - it
- // results in a single rotate operation being inserted.
+ // Now we have signed numbers that have been shifted so that, given enough
+ // precision, there are no negative values. Since the rest of the transform
+ // is bitwise only, we switch now to an unsigned representation.
+ uint64_t GCD = 0;
+ for (auto &V : Values)
+ GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
+
+ // This transform can be done speculatively because it is so cheap - it results
+ // in a single rotate operation being inserted. This can only happen if the
+ // factor extracted is a power of 2.
+ // FIXME: If the GCD is an odd number we can multiply by the multiplicative
+ // inverse of GCD and then perform this transform.
// FIXME: It's possible that optimizing a switch on powers of two might also
// be beneficial - flag values are often powers of two and we could use a CLZ
// as the key function.
+ if (GCD <= 1 || !isPowerOf2_64(GCD))
+ // No common divisor found or too expensive to compute key function.
+ return false;
- // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
- // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
- // less than 64.
- unsigned Shift = 64;
- // We need to store this from _before_ the transform
- uint64_t BestIndexXor = Values[BestIndex];
+ unsigned Shift = Log2_64(GCD);
for (auto &V : Values)
- Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor));
- assert(Shift < 64);
- if (Shift > 0) {
- MadeChanges = true;
- for (auto &V : Values)
- V >>= Shift;
- }
-
- // We Xor against Values[] (any element will do) because the if we do not
- // start at zero, but also don't meet the SubThreshold, then we still might
- // share common rights bits, and if this transform succeeds
- // then we should insert the subtraction anyways, because the rotate trick
- // below to avoid a branch needs the shifted away bits to be zero.
-
- // Now transform the values such that they start at zero and ascend. Do not
- // do this if the shift reduces the lowest value to less than SubThreshold,
- // or if the subtraction is less than SubThreshold and it does not enable a
- // rotate.
- uint64_t Base = 0;
- if ((BestIndexXor >= SubThreshold && Shift == 0) ||
- (Shift > countTrailingZeros(BestIndexXor) &&
- Values[BestIndex] >= SubThreshold)) {
- Base = BestIndexXor;
- MadeChanges = true;
- for (auto &V : Values)
- V = (APInt(BitWidth, V) - Base).getLimitedValue();
- }
-
- if (!MadeChanges)
- // We didn't do anything.
+ V = (int64_t)((uint64_t)V >> Shift);
+
+ if (!isSwitchDense(Values))
+ // Transform didn't create a dense switch.
return false;
// The obvious transform is to shift the switch condition right and emit a
@@ -5610,22 +5595,18 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
Builder.SetInsertPoint(SI);
- Value *Key = SI->getCondition();
- if (Base > 0)
- Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base));
- if (Shift > 0) {
- // FIXME replace with fshr?
- auto *ShiftC = ConstantInt::get(Ty, Shift);
- auto *LShr = Builder.CreateLShr(Key, ShiftC);
- auto *Shl = Builder.CreateShl(Key, Ty->getBitWidth() - Shift);
- Key = Builder.CreateOr(LShr, Shl);
- }
- SI->replaceUsesOfWith(SI->getCondition(), Key);
+ auto *ShiftC = ConstantInt::get(Ty, Shift);
+ auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
+ auto *LShr = Builder.CreateLShr(Sub, ShiftC);
+ auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
+ auto *Rot = Builder.CreateOr(LShr, Shl);
+ SI->replaceUsesOfWith(SI->getCondition(), Rot);
for (auto Case : SI->cases()) {
auto *Orig = Case.getCaseValue();
- auto Sub = Orig->getValue() - Base;
- Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
+ auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
+ Case.setValue(
+ cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
}
return true;
}
@@ -5666,9 +5647,6 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
return requestResimplify();
- if (ReduceSwitchRange(SI, Builder, DL, TTI))
- return requestResimplify();
-
// The conversion from switch to lookup tables results in difficult-to-analyze
// code and makes pruning branches much harder. This is a problem if the
// switch expression itself can still be restricted as a result of inlining or
@@ -5678,6 +5656,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
SwitchToLookupTable(SI, Builder, DL, TTI))
return requestResimplify();
+ if (ReduceSwitchRange(SI, Builder, DL, TTI))
+ return requestResimplify();
+
return false;
}
OpenPOWER on IntegriCloud