[InstCombine] Simplify funnel shift with zero/undef operand to shift

The following simplifications are implemented: * `fshl(X, 0, C) -> shl X, C%BW` * `fshl(X, undef, C) -> shl X, C%BW` (assuming undef = 0) * `fshl(0, X, C) -> lshr X, BW-C%BW` * `fshl(undef, X, C) -> lshr X, BW-C%BW` (assuming undef = 0) * `fshr(X, 0, C) -> shl X, (BW-C%BW)` * `fshr(X, undef, C) -> shl X, BW-C%BW` (assuming undef = 0) * `fshr(0, X, C) -> lshr X, C%BW` * `fshr(undef, X, C) -> lshr, X, C%BW` (assuming undef = 0) The simplification is only performed if the shift amount C is constant, because we can explicitly compute C%BW and BW-C%BW in this case. Differential Revision: https://reviews.llvm.org/D54778 llvm-svn: 347505
author: Nikita Popov <nikita.ppv@gmail.com> 2018-11-23 22:45:08 +0000
committer: Nikita Popov <nikita.ppv@gmail.com> 2018-11-23 22:45:08 +0000
commit: 6e81d421e1738e4e5934f141de580fc546e02f0d (patch)
tree: 9a5a8a4c66519fc6c707743e8380bf6f25b1fb35
parent: 079bf4b7b4fc3c2ffeb57b02a580408cd7259bf8 (diff)
download: bcm5719-llvm-6e81d421e1738e4e5934f141de580fc546e02f0d.tar.gz
bcm5719-llvm-6e81d421e1738e4e5934f141de580fc546e02f0d.zip
2 files changed, 68 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a99eaf013e6..cbaa0ddaa28 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1992,6 +1992,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
   case Intrinsic::fshl:
   case Intrinsic::fshr: {
+    const APInt *SA;
+    if (match(II->getArgOperand(2), m_APInt(SA))) {
+      Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+      unsigned BitWidth = SA->getBitWidth();
+      uint64_t ShiftAmt = SA->urem(BitWidth);
+      assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift");
+      // Normalize to funnel shift left.
+      if (II->getIntrinsicID() == Intrinsic::fshr)
+        ShiftAmt = BitWidth - ShiftAmt;
+
+      // fshl(X, 0, C) -> shl X, C
+      // fshl(X, undef, C) -> shl X, C
+      if (match(Op1, m_Zero()) || match(Op1, m_Undef()))
+        return BinaryOperator::CreateShl(
+            Op0, ConstantInt::get(II->getType(), ShiftAmt));
+
+      // fshl(0, X, C) -> lshr X, (BW-C)
+      // fshl(undef, X, C) -> lshr X, (BW-C)
+      if (match(Op0, m_Zero()) || match(Op0, m_Undef()))
+        return BinaryOperator::CreateLShr(
+            Op1, ConstantInt::get(II->getType(), BitWidth - ShiftAmt));
+    }
+
     // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
     // so only the low bits of the shift amount are demanded if the bitwidth is
     // a power-of-2.
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 0d85613d348..db9b92e7076 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -141,11 +141,11 @@ define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x
   ret <2 x i31> %r
 }
 
-; Simplify one undef operand and constant shift amount.
+; Simplify one undef or zero operand and constant shift amount.
 
 define i32 @fshl_op0_undef(i32 %x) {
 ; CHECK-LABEL: @fshl_op0_undef(
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 undef, i32 [[X:%.*]], i32 7)
+; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], 25
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %r = call i32 @llvm.fshl.i32(i32 undef, i32 %x, i32 7)
@@ -154,7 +154,7 @@ define i32 @fshl_op0_undef(i32 %x) {
 
 define i32 @fshl_op0_zero(i32 %x) {
 ; CHECK-LABEL: @fshl_op0_zero(
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 0, i32 [[X:%.*]], i32 7)
+; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[X:%.*]], 25
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %r = call i32 @llvm.fshl.i32(i32 0, i32 %x, i32 7)
@@ -163,7 +163,7 @@ define i32 @fshl_op0_zero(i32 %x) {
 
 define i33 @fshr_op0_undef(i33 %x) {
 ; CHECK-LABEL: @fshr_op0_undef(
-; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 undef, i33 [[X:%.*]], i33 7)
+; CHECK-NEXT:    [[R:%.*]] = lshr i33 [[X:%.*]], 7
 ; CHECK-NEXT:    ret i33 [[R]]
 ;
   %r = call i33 @llvm.fshr.i33(i33 undef, i33 %x, i33 7)
@@ -172,7 +172,7 @@ define i33 @fshr_op0_undef(i33 %x) {
 
 define i33 @fshr_op0_zero(i33 %x) {
 ; CHECK-LABEL: @fshr_op0_zero(
-; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 0, i33 [[X:%.*]], i33 7)
+; CHECK-NEXT:    [[R:%.*]] = lshr i33 [[X:%.*]], 7
 ; CHECK-NEXT:    ret i33 [[R]]
 ;
   %r = call i33 @llvm.fshr.i33(i33 0, i33 %x, i33 7)
@@ -181,7 +181,7 @@ define i33 @fshr_op0_zero(i33 %x) {
 
 define i32 @fshl_op1_undef(i32 %x) {
 ; CHECK-LABEL: @fshl_op1_undef(
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 undef, i32 7)
+; CHECK-NEXT:    [[R:%.*]] = shl i32 [[X:%.*]], 7
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %r = call i32 @llvm.fshl.i32(i32 %x, i32 undef, i32 7)
@@ -190,7 +190,7 @@ define i32 @fshl_op1_undef(i32 %x) {
 
 define i32 @fshl_op1_zero(i32 %x) {
 ; CHECK-LABEL: @fshl_op1_zero(
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 0, i32 7)
+; CHECK-NEXT:    [[R:%.*]] = shl i32 [[X:%.*]], 7
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %r = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 7)
@@ -199,7 +199,7 @@ define i32 @fshl_op1_zero(i32 %x) {
 
 define i33 @fshr_op1_undef(i33 %x) {
 ; CHECK-LABEL: @fshr_op1_undef(
-; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 undef, i33 7)
+; CHECK-NEXT:    [[R:%.*]] = shl i33 [[X:%.*]], 26
 ; CHECK-NEXT:    ret i33 [[R]]
 ;
   %r = call i33 @llvm.fshr.i33(i33 %x, i33 undef, i33 7)
@@ -208,13 +208,49 @@ define i33 @fshr_op1_undef(i33 %x) {
 
 define i33 @fshr_op1_zero(i33 %x) {
 ; CHECK-LABEL: @fshr_op1_zero(
-; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 0, i33 7)
+; CHECK-NEXT:    [[R:%.*]] = shl i33 [[X:%.*]], 26
 ; CHECK-NEXT:    ret i33 [[R]]
 ;
   %r = call i33 @llvm.fshr.i33(i33 %x, i33 0, i33 7)
   ret i33 %r
 }
 
+define <2 x i31> @fshl_op0_zero_vec(<2 x i31> %x) {
+; CHECK-LABEL: @fshl_op0_zero_vec(
+; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i31> [[X:%.*]], <i31 24, i31 24>
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> <i31 7, i31 7>)
+  ret <2 x i31> %r
+}
+
+define <2 x i31> @fshl_op1_undef_vec(<2 x i31> %x) {
+; CHECK-LABEL: @fshl_op1_undef_vec(
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i31> [[X:%.*]], <i31 7, i31 7>
+; CHECK-NEXT:    ret <2 x i31> [[R]]
+;
+  %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> <i31 7, i31 7>)
+  ret <2 x i31> %r
+}
+
+define <2 x i32> @fshr_op0_undef_vec(<2 x i32> %x) {
+; CHECK-LABEL: @fshr_op0_undef_vec(
+; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 7, i32 7>)
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) {
+; CHECK-LABEL: @fshr_op1_zero_vec(
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 25, i32 25>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 7, i32 7>)
+  ret <2 x i32> %r
+}
+
 ; Only demand bits from one of the operands.
 
 define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) {
author	Nikita Popov <nikita.ppv@gmail.com>	2018-11-23 22:45:08 +0000
committer	Nikita Popov <nikita.ppv@gmail.com>	2018-11-23 22:45:08 +0000
commit	6e81d421e1738e4e5934f141de580fc546e02f0d (patch)
tree	9a5a8a4c66519fc6c707743e8380bf6f25b1fb35
parent	079bf4b7b4fc3c2ffeb57b02a580408cd7259bf8 (diff)
download	bcm5719-llvm-6e81d421e1738e4e5934f141de580fc546e02f0d.tar.gz bcm5719-llvm-6e81d421e1738e4e5934f141de580fc546e02f0d.zip