summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2019-07-19 08:26:37 +0000
committerRoman Lebedev <lebedev.ri@gmail.com>2019-07-19 08:26:37 +0000
commit3c212ce3054a94d8f501a8d0184c4e298964e8ec (patch)
treed08b1baa0e7a47f0b555db6370d5e45865765c19
parent2ebe57386d65a60906036cb04d66c84da82c8fff (diff)
downloadbcm5719-llvm-3c212ce3054a94d8f501a8d0184c4e298964e8ec.tar.gz
bcm5719-llvm-3c212ce3054a94d8f501a8d0184c4e298964e8ec.zip
[InstCombine] Dropping redundant masking before left-shift [3/5] (PR42563)
Summary: If we have some pattern that leaves only some low bits set, and then performs left-shift of those bits, if none of the bits that are left after the final shift are modified by the mask, we can omit the mask. There are many variants to this pattern: d. `(x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt` All these patterns can be simplified to just: `x << ShiftShAmt` iff: d. `(ShiftShAmt-MaskShAmt) s>= 0` (i.e. `ShiftShAmt u>= MaskShAmt`) alive proofs: d: https://rise4fun.com/Alive/I5Y For now let's start with patterns where both shift amounts are variable, with trivial constant "offset" between them, since i believe this is both simplest to handle and i think this is most common. But again, there are likely other variants where we could use ValueTracking/ConstantRange to handle more cases. https://bugs.llvm.org/show_bug.cgi?id=42563 Differential Revision: https://reviews.llvm.org/D64519 llvm-svn: 366538
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp8
-rw-r--r--llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll20
2 files changed, 16 insertions, 12 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index b94febf786e..5d7624c1ebc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -73,11 +73,12 @@ reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
// a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
// b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
// c) (x & (-1 >> MaskShAmt)) << ShiftShAmt
+// d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt
// All these patterns can be simplified to just:
// x << ShiftShAmt
// iff:
// a,b) (MaskShAmt+ShiftShAmt) u>= bitwidth(x)
-// c) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
+// c,d) (ShiftShAmt-MaskShAmt) s>= 0 (i.e. ShiftShAmt u>= MaskShAmt)
static Instruction *
dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
const SimplifyQuery &SQ) {
@@ -95,6 +96,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes());
// (-1 >> MaskShAmt)
auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt));
+ // ((-1 << MaskShAmt) >> MaskShAmt)
+ auto MaskD =
+ m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt));
Value *X;
if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) {
@@ -111,7 +115,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
APInt(BitWidth, BitWidth))))
return nullptr;
// All good, we can do this fold.
- } else if (match(Masked, m_c_And(MaskC, m_Value(X)))) {
+ } else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X)))) {
// Can we simplify (ShiftShAmt-MaskShAmt) ?
Value *ShAmtsDiff =
SimplifySubInst(ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false,
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll
index 601e149274c..1f0446cb0ec 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll
@@ -23,7 +23,7 @@ define i32 @t0_basic(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
-; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[NBITS]]
+; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T4]]
;
%t0 = shl i32 -1, %nbits
@@ -46,7 +46,7 @@ define i32 @t1_bigger_shift(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: call void @use32(i32 [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = shl i32 [[T2]], [[T3]]
+; CHECK-NEXT: [[T4:%.*]] = shl i32 [[X]], [[T3]]
; CHECK-NEXT: ret i32 [[T4]]
;
%t0 = shl i32 -1, %nbits
@@ -75,7 +75,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[T3]]
+; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[T3]]
; CHECK-NEXT: ret <3 x i32> [[T4]]
;
%t0 = shl <3 x i32> <i32 -1, i32 -1, i32 -1>, %nbits
@@ -100,7 +100,7 @@ define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) {
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[T3]]
+; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[T3]]
; CHECK-NEXT: ret <3 x i32> [[T4]]
;
%t0 = shl <3 x i32> <i32 -1, i32 -1, i32 -1>, %nbits
@@ -124,7 +124,7 @@ define <3 x i32> @t4_vec_undef(<3 x i32> %x, <3 x i32> %nbits) {
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]])
; CHECK-NEXT: call void @use3xi32(<3 x i32> [[NBITS]])
-; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[T2]], [[NBITS]]
+; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[NBITS]]
; CHECK-NEXT: ret <3 x i32> [[T4]]
;
%t0 = shl <3 x i32> <i32 -1, i32 undef, i32 -1>, %nbits
@@ -152,7 +152,7 @@ define i32 @t5_commutativity0(i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = shl i32 [[T2]], [[NBITS]]
+; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T3]]
;
%x = call i32 @gen32()
@@ -178,7 +178,7 @@ define i32 @t6_commutativity1(i32 %nbits0, i32 %nbits1) {
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: call void @use32(i32 [[T3]])
; CHECK-NEXT: call void @use32(i32 [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[NBITS0]]
+; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T3]], [[NBITS0]]
; CHECK-NEXT: ret i32 [[T5]]
;
%t0 = shl i32 -1, %nbits0
@@ -233,7 +233,7 @@ define i32 @t8_nuw(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = shl nuw i32 [[T2]], [[NBITS]]
+; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = shl i32 -1, %nbits
@@ -254,7 +254,7 @@ define i32 @t9_nsw(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = shl nsw i32 [[T2]], [[NBITS]]
+; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = shl i32 -1, %nbits
@@ -275,7 +275,7 @@ define i32 @t10_nuw_nsw(i32 %x, i32 %nbits) {
; CHECK-NEXT: call void @use32(i32 [[T0]])
; CHECK-NEXT: call void @use32(i32 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw i32 [[T2]], [[NBITS]]
+; CHECK-NEXT: [[T3:%.*]] = shl i32 [[X]], [[NBITS]]
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = shl i32 -1, %nbits
OpenPOWER on IntegriCloud