summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp17
-rw-r--r--llvm/test/Transforms/InstSimplify/shift.ll12
2 files changed, 19 insertions, 10 deletions
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 6ebae37a6a8..7fc7c15a0c2 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1325,6 +1325,23 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
return X;
+ // ((X << A) | Y) >> A -> X if effective width of Y is not larger than A.
+ // We can return X as we do in the above case since OR alters no bits in X.
+ // SimplifyDemandedBits in InstCombine can do more general optimization for
+ // bit manipulation. This pattern aims to provide opportunities for other
+ // optimizers by supporting a simple but common case in InstSimplify.
+ Value *Y;
+ const APInt *ShRAmt, *ShLAmt;
+ if (match(Op1, m_APInt(ShRAmt)) &&
+ match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) &&
+ *ShRAmt == *ShLAmt) {
+ const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const unsigned Width = Op0->getType()->getScalarSizeInBits();
+ const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ if (EffWidthY <= ShRAmt->getZExtValue())
+ return X;
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstSimplify/shift.ll b/llvm/test/Transforms/InstSimplify/shift.ll
index f49c76bf625..cbffd371853 100644
--- a/llvm/test/Transforms/InstSimplify/shift.ll
+++ b/llvm/test/Transforms/InstSimplify/shift.ll
@@ -178,11 +178,7 @@ define <2 x i8> @shl_by_sext_bool_vec(<2 x i1> %x, <2 x i8> %y) {
define i64 @shl_or_shr(i32 %a, i32 %b) {
; CHECK-LABEL: @shl_or_shr(
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 32
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: ret i64 [[TMP1]]
;
%tmp1 = zext i32 %a to i64
%tmp2 = zext i32 %b to i64
@@ -214,11 +210,7 @@ define i64 @shl_or_shr2(i32 %a, i32 %b) {
define <2 x i64> @shl_or_shr1v(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: @shl_or_shr1v(
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
-; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i64> [[TMP4]], <i64 32, i64 32>
-; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
;
%tmp1 = zext <2 x i32> %a to <2 x i64>
%tmp2 = zext <2 x i32> %b to <2 x i64>
OpenPOWER on IntegriCloud