diff options
| -rw-r--r-- | llvm/lib/Analysis/InstructionSimplify.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Analysis/ValueTracking.cpp | 145 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 21 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/all-bits-shift.ll | 46 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/div.ll | 4 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/load-combine-metadata.ll | 6 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstSimplify/shift-128-kb.ll | 22 |
7 files changed, 215 insertions, 40 deletions
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index be5ce2960ea..cf6aa9a1163 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4145,6 +4145,17 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, break; } + // In general, it is possible for computeKnownBits to determine all bits in a + // value even when the operands are not all constants. + if (!Result && I->getType()->isIntegerTy()) { + unsigned BitWidth = I->getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT); + if ((KnownZero | KnownOne).isAllOnesValue()) + Result = ConstantInt::get(I->getContext(), KnownOne); + } + /// If called on unreachable code, the above logic may report that the /// instruction simplified to itself. Make life easier for users by /// detecting that case here, returning a safe value instead. diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index ad6be85f87a..ea2321e832e 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -393,6 +393,12 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) { SmallPtrSet<const Value *, 32> Visited; SmallPtrSet<const Value *, 16> EphValues; + // The instruction defining an assumption's condition itself is always + // considered ephemeral to that assumption (even if it has other + // non-ephemeral users). See r246696's test case for an example. + if (std::find(I->op_begin(), I->op_end(), E) != I->op_end()) + return true; + while (!WorkSet.empty()) { const Value *V = WorkSet.pop_back_val(); if (!Visited.insert(V).second) @@ -967,6 +973,71 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } } +// Compute known bits from a shift operator, including those with a +// non-constant shift amount. KnownZero and KnownOne are the outputs of this +// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the +// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific +// functors that, given the known-zero or known-one bits respectively, and a +// shift amount, compute the implied known-zero or known-one bits of the shift +// operator's result respectively for that shift amount. The results from calling +// KZF and KOF are conservatively combined for all permitted shift amounts. +template <typename KZFunctor, typename KOFunctor> +static void computeKnownBitsFromShiftOperator(Operator *I, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout &DL, unsigned Depth, const Query &Q, + KZFunctor KZF, KOFunctor KOF) { + unsigned BitWidth = KnownZero.getBitWidth(); + + if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); + + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + KnownZero = KZF(KnownZero, ShiftAmt); + KnownOne = KOF(KnownOne, ShiftAmt); + return; + } + + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + + // Note: We cannot use KnownZero.getLimitedValue() here, because if + // BitWidth > 64 and any upper bits are known, we'll end up returning the + // limit value (which implies all bits are known). + uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue(); + + // It would be more-clearly correct to use the two temporaries for this + // calculation. Reusing the APInts here to prevent unnecessary allocations. + KnownZero.clearAllBits(), KnownOne.clearAllBits(); + + // Early exit if we can't constrain any well-defined shift amount. + if (!(ShiftAmtKZ & (BitWidth-1)) && !(ShiftAmtKO & (BitWidth-1))) + return; + + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { + // Combine the shifted known input bits only for those shift amounts + // compatible with its known constraints. + if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt) + continue; + if ((ShiftAmt | ShiftAmtKO) != ShiftAmt) + continue; + + KnownZero &= KZF(KnownZero2, ShiftAmt); + KnownOne &= KOF(KnownOne2, ShiftAmt); + } + + // If there are no compatible shift amounts, then we've proven that the shift + // amount must be >= the BitWidth, and the result is undefined. We could + // return anything we'd like, but we need to make sure the sets of known bits + // stay disjoint (it should be better for some other code to actually + // propagate the undef than to pick a value here using known bits). + if ((KnownZero & KnownOne) != 0) + KnownZero.clearAllBits(), KnownOne.clearAllBits(); +} + static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth, const Query &Q) { @@ -1104,48 +1175,54 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); break; } - case Instruction::Shl: + case Instruction::Shl: { // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; - KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 - } + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return (KnownZero << ShiftAmt) | + APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0. + }; + + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return KnownOne << ShiftAmt; + }; + + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; - case Instruction::LShr: + } + case Instruction::LShr: { // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - - // Unsigned shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - // high bits known zero. - KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - } + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return APIntOps::lshr(KnownZero, ShiftAmt) | + // High bits known zero. + APInt::getHighBitsSet(BitWidth, ShiftAmt); + }; + + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return APIntOps::lshr(KnownOne, ShiftAmt); + }; + + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; - case Instruction::AShr: + } + case Instruction::AShr: { // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return APIntOps::ashr(KnownZero, ShiftAmt); + }; - // Signed shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return APIntOps::ashr(KnownOne, ShiftAmt); + }; - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. - KnownZero |= HighBits; - else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. - KnownOne |= HighBits; - } + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; + } case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 9b19732acfc..69a2661f031 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2734,6 +2734,27 @@ bool InstCombiner::run() { } } + // In general, it is possible for computeKnownBits to determine all bits in a + // value even when the operands are not all constants. + if (!I->use_empty() && I->getType()->isIntegerTy()) { + unsigned BitWidth = I->getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I); + if ((KnownZero | KnownOne).isAllOnesValue()) { + Constant *C = ConstantInt::get(I->getContext(), KnownOne); + DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << + " from: " << *I << '\n'); + + // Add operands to the worklist. + ReplaceInstUsesWith(*I, C); + ++NumConstProp; + EraseInstFromFunction(*I); + MadeIRChange = true; + continue; + } + } + // See if we can trivially sink this instruction to a successor basic block. if (I->hasOneUse()) { BasicBlock *BB = I->getParent(); diff --git a/llvm/test/Transforms/InstCombine/all-bits-shift.ll b/llvm/test/Transforms/InstCombine/all-bits-shift.ll new file mode 100644 index 00000000000..b9eb19cf2ad --- /dev/null +++ b/llvm/test/Transforms/InstCombine/all-bits-shift.ll @@ -0,0 +1,46 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; RUN: opt -S -instsimplify < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@d = global i32 15, align 4 +@b = global i32* @d, align 8 +@a = common global i32 0, align 4 + +; Function Attrs: nounwind +define signext i32 @main() #1 { +entry: + %0 = load i32*, i32** @b, align 8 + %1 = load i32, i32* @a, align 4 + %lnot = icmp eq i32 %1, 0 + %lnot.ext = zext i1 %lnot to i32 + %shr.i = lshr i32 2072, %lnot.ext + %call.lobit = lshr i32 %shr.i, 7 + %2 = and i32 %call.lobit, 1 + %3 = load i32, i32* %0, align 4 + %or = or i32 %2, %3 + store i32 %or, i32* %0, align 4 + %4 = load i32, i32* @a, align 4 + %lnot.1 = icmp eq i32 %4, 0 + %lnot.ext.1 = zext i1 %lnot.1 to i32 + %shr.i.1 = lshr i32 2072, %lnot.ext.1 + %call.lobit.1 = lshr i32 %shr.i.1, 7 + %5 = and i32 %call.lobit.1, 1 + %or.1 = or i32 %5, %or + store i32 %or.1, i32* %0, align 4 + ret i32 %or.1 + +; Check that both InstCombine and InstSimplify can use computeKnownBits to +; realize that: +; ((2072 >> (L == 0)) >> 7) & 1 +; is always zero. + +; CHECK-LABEL: @main +; CHECK: %[[V1:[0-9]+]] = load i32*, i32** @b, align 8 +; CHECK: %[[V2:[0-9]+]] = load i32, i32* %[[V1]], align 4 +; CHECK: ret i32 %[[V2]] +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll index 3194c015fd7..27a316113e5 100644 --- a/llvm/test/Transforms/InstCombine/div.ll +++ b/llvm/test/Transforms/InstCombine/div.ll @@ -270,9 +270,7 @@ define <2 x i32> @test31(<2 x i32> %x) { %div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647> ret <2 x i32> %div ; CHECK-LABEL: @test31( -; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31> -; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647> -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: ret <2 x i32> zeroinitializer } define i32 @test32(i32 %a, i32 %b) { diff --git a/llvm/test/Transforms/InstCombine/load-combine-metadata.ll b/llvm/test/Transforms/InstCombine/load-combine-metadata.ll index 9b9c1fe607b..24b26fa4213 100644 --- a/llvm/test/Transforms/InstCombine/load-combine-metadata.ll +++ b/llvm/test/Transforms/InstCombine/load-combine-metadata.ll @@ -17,9 +17,9 @@ define void @test_load_load_combine_metadata(i32*, i32*, i32*) { ret void } -; CHECK: ![[RANGE]] = !{i32 0, i32 1, i32 8, i32 9} -!0 = !{ i32 0, i32 1 } -!1 = !{ i32 8, i32 9 } +; CHECK: ![[RANGE]] = !{i32 0, i32 5, i32 7, i32 9} +!0 = !{ i32 0, i32 5 } +!1 = !{ i32 7, i32 9 } !2 = !{!2} !3 = !{!3, !2} !4 = !{!4, !2} diff --git a/llvm/test/Transforms/InstSimplify/shift-128-kb.ll b/llvm/test/Transforms/InstSimplify/shift-128-kb.ll new file mode 100644 index 00000000000..3f69ecccaf5 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/shift-128-kb.ll @@ -0,0 +1,22 @@ +; RUN: opt -S -instsimplify < %s | FileCheck %s + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define zeroext i1 @_Z10isNegativemj(i64 %Val, i32 zeroext %IntegerBitWidth) { +entry: + %conv = zext i32 %IntegerBitWidth to i64 + %sub = sub i64 128, %conv + %conv1 = trunc i64 %sub to i32 + %conv2 = zext i64 %Val to i128 + %sh_prom = zext i32 %conv1 to i128 + %shl = shl i128 %conv2, %sh_prom + %shr = ashr i128 %shl, %sh_prom + %cmp = icmp slt i128 %shr, 0 + ret i1 %cmp +} + +; CHECK-LABEL: @_Z10isNegativemj +; CHECK-NOT: ret i1 false +; CHECK: ret i1 %cmp + |

