summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-10-05 21:11:49 +0000
committerSanjay Patel <spatel@rotateright.com>2017-10-05 21:11:49 +0000
commit7ac2db6a489f3f8e588589a69164882df7973d34 (patch)
tree78f6b05875a1bba27a6bc111b8165826a1df9a57 /llvm/lib
parent1917176d4796d083441547aaf8da5bac65dbf281 (diff)
downloadbcm5719-llvm-7ac2db6a489f3f8e588589a69164882df7973d34.tar.gz
bcm5719-llvm-7ac2db6a489f3f8e588589a69164882df7973d34.zip
[InstCombine] improve folds for icmp gt/lt (shr X, C1), C2
We can always eliminate the shift in: icmp gt/lt (shr X, C1), C2 --> icmp gt/lt X, C' This patch was supposed to just be an efficiency improvement because we were doing this 3-step process to fold: IC: Visiting: %c = icmp ugt i4 %s, 1 IC: ADD: %s = lshr i4 %x, 1 IC: ADD: %1 = udiv i4 %x, 2 IC: Old = %c = icmp ugt i4 %1, 1 New = <badref> = icmp uge i4 %x, 4 IC: ADD: %c = icmp uge i4 %x, 4 IC: ERASE %2 = icmp ugt i4 %1, 1 IC: Visiting: %c = icmp uge i4 %x, 4 IC: Old = %c = icmp uge i4 %x, 4 New = <badref> = icmp ugt i4 %x, 3 IC: ADD: %c = icmp ugt i4 %x, 3 IC: ERASE %2 = icmp uge i4 %x, 4 IC: Visiting: %c = icmp ugt i4 %x, 3 IC: DCE: %1 = udiv i4 %x, 2 IC: ERASE %1 = udiv i4 %x, 2 IC: DCE: %s = lshr i4 %x, 1 IC: ERASE %s = lshr i4 %x, 1 IC: Visiting: ret i1 %c When we could go directly to canonical icmp form: IC: Visiting: %c = icmp ugt i4 %s, 1 IC: Old = %c = icmp ugt i4 %s, 1 New = <badref> = icmp ugt i4 %x, 3 IC: ADD: %c = icmp ugt i4 %x, 3 IC: ERASE %1 = icmp ugt i4 %s, 1 IC: ADD: %s = lshr i4 %x, 1 IC: DCE: %s = lshr i4 %x, 1 IC: ERASE %s = lshr i4 %x, 1 IC: Visiting: %c = icmp ugt i4 %x, 3 ...but then I noticed that the folds were incomplete too: https://godbolt.org/g/aB2hLE Here are attempts to prove the logic with Alive: https://rise4fun.com/Alive/92o Name: lshr_ult Pre: ((C2 << C1) u>> C1) == C2 %sh = lshr i8 %x, C1 %r = icmp ult i8 %sh, C2 => %r = icmp ult i8 %x, (C2 << C1) Name: ashr_slt Pre: ((C2 << C1) >> C1) == C2 %sh = ashr i8 %x, C1 %r = icmp slt i8 %sh, C2 => %r = icmp slt i8 %x, (C2 << C1) Name: lshr_ugt Pre: (((C2+1) << C1) u>> C1) == (C2+1) %sh = lshr i8 %x, C1 %r = icmp ugt i8 %sh, C2 => %r = icmp ugt i8 %x, ((C2+1) << C1) - 1 Name: ashr_sgt Pre: (C2 != 127) && ((C2+1) << C1 != -128) && (((C2+1) << C1) >> C1) == (C2+1) %sh = ashr i8 %x, C1 %r = icmp sgt i8 %sh, C2 => %r = icmp sgt i8 %x, ((C2+1) << C1) - 1 Name: ashr_exact_sgt Pre: ((C2 << C1) >> C1) == C2 %sh = ashr exact i8 %x, C1 %r = icmp sgt i8 %sh, C2 => %r = icmp sgt i8 %x, (C2 << C1) Name: ashr_exact_slt Pre: ((C2 << C1) >> C1) == C2 %sh = ashr exact i8 %x, C1 %r = icmp slt i8 %sh, C2 => %r = icmp slt i8 %x, (C2 << C1) Name: lshr_exact_ugt Pre: ((C2 << C1) u>> C1) == C2 %sh = lshr exact i8 %x, C1 %r = icmp ugt i8 %sh, C2 => %r = icmp ugt i8 %x, (C2 << C1) Name: lshr_exact_ult Pre: ((C2 << C1) u>> C1) == C2 %sh = lshr exact i8 %x, C1 %r = icmp ult i8 %sh, C2 => %r = icmp ult i8 %x, (C2 << C1) We did something similar for 'shl' in D28406. Differential Revision: https://reviews.llvm.org/D38514 llvm-svn: 315021
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp77
1 files changed, 40 insertions, 37 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 20d8a2785b8..67b5e5c130b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2011,43 +2011,46 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
return nullptr;
bool IsAShr = Shr->getOpcode() == Instruction::AShr;
- if (!Cmp.isEquality()) {
- // If we have an unsigned comparison and an ashr, we can't simplify this.
- // Similarly for signed comparisons with lshr.
- if (Cmp.isSigned() != IsAShr)
- return nullptr;
-
- // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
- // by a power of 2. Since we already have logic to simplify these,
- // transform to div and then simplify the resultant comparison.
- if (IsAShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1))
- return nullptr;
-
- // Revisit the shift (to delete it).
- Worklist.Add(Shr);
-
- Constant *DivCst = ConstantInt::get(
- Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
-
- Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact())
- : Builder.CreateUDiv(X, DivCst, "", Shr->isExact());
-
- Cmp.setOperand(0, Tmp);
-
- // If the builder folded the binop, just return it.
- BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
- if (!TheDiv)
- return &Cmp;
-
- // Otherwise, fold this div/compare.
- assert(TheDiv->getOpcode() == Instruction::SDiv ||
- TheDiv->getOpcode() == Instruction::UDiv);
-
- Instruction *Res = foldICmpDivConstant(Cmp, TheDiv, C);
- assert(Res && "This div/cst should have folded!");
- return Res;
+ bool IsExact = Shr->isExact();
+ Type *ShrTy = Shr->getType();
+ // TODO: If we could guarantee that InstSimplify would handle all of the
+ // constant-value-based preconditions in the folds below, then we could assert
+ // those conditions rather than checking them. This is difficult because of
+ // undef/poison (PR34838).
+ if (IsAShr) {
+ if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
+ // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
+ // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
+ APInt ShiftedC = C.shl(ShAmtVal);
+ if (ShiftedC.ashr(ShAmtVal) == C)
+ return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+ }
+ if (Pred == CmpInst::ICMP_SGT) {
+ // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
+ APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
+ if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
+ (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
+ return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+ }
+ } else {
+ if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
+ // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
+ // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
+ APInt ShiftedC = C.shl(ShAmtVal);
+ if (ShiftedC.lshr(ShAmtVal) == C)
+ return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+ }
+ if (Pred == CmpInst::ICMP_UGT) {
+ // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
+ APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
+ if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
+ return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+ }
}
+ if (!Cmp.isEquality())
+ return nullptr;
+
// Handle equality comparisons of shift-by-constant.
// If the comparison constant changes with the shift, the comparison cannot
@@ -2060,14 +2063,14 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
// Check if the bits shifted out are known to be zero. If so, we can compare
// against the unshifted value:
// (X & 4) >> 1 == 2 --> (X & 4) == 4.
- Constant *ShiftedCmpRHS = ConstantInt::get(Shr->getType(), C << ShAmtVal);
+ Constant *ShiftedCmpRHS = ConstantInt::get(ShrTy, C << ShAmtVal);
if (Shr->hasOneUse()) {
if (Shr->isExact())
return new ICmpInst(Pred, X, ShiftedCmpRHS);
// Otherwise strength reduce the shift into an 'and'.
APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
- Constant *Mask = ConstantInt::get(Shr->getType(), Val);
+ Constant *Mask = ConstantInt::get(ShrTy, Val);
Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
return new ICmpInst(Pred, And, ShiftedCmpRHS);
}
OpenPOWER on IntegriCloud