[DAGCombine] Simplify funnel shifts with undef/zero args to bitshifts

Now that we have SimplifyDemandedBits support for funnel shifts (rL353539), we need to simplify funnel shifts back to bitshifts in cases where either argument has been folded to undef/zero. Differential Revision: https://reviews.llvm.org/D58009 llvm-svn: 353645
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-02-10 17:04:00 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-02-10 17:04:00 +0000
commit: 5a82a788a28e3b255d4b8448e338e5d3566a2010 (patch)
tree: 11edff1dd0e8378ec7f7392c7321efa442c746f3
parent: 06a61b0b2b38ff47fc8bdee37ce4e089a821bc9f (diff)
download: bcm5719-llvm-5a82a788a28e3b255d4b8448e338e5d3566a2010.tar.gz
bcm5719-llvm-5a82a788a28e3b255d4b8448e338e5d3566a2010.zip
2 files changed, 77 insertions, 42 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 96b6aec78d5..be867b24ba6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7127,13 +7127,52 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
       return IsFSHL ? N0 : N1;
 
-  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+  auto IsUndefOrZero = [](SDValue V) {
+    if (V.isUndef())
+      return true;
+    if (ConstantSDNode *Cst = isConstOrConstSplat(V, /*AllowUndefs*/true))
+      return Cst->getAPIntValue() == 0;
+    return false;
+  };
+
   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+    EVT ShAmtTy = N2.getValueType();
+
+    // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
     if (Cst->getAPIntValue().uge(BitWidth)) {
       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
-                         DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+                         DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
     }
+
+    unsigned ShAmt = Cst->getZExtValue();
+    if (ShAmt == 0)
+      return IsFSHL ? N0 : N1;
+
+    // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+    // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+    // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+    // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+    if (IsUndefOrZero(N0))
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+                         DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+                                         SDLoc(N), ShAmtTy));
+    if (IsUndefOrZero(N1))
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+                         DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+                                         SDLoc(N), ShAmtTy));
+  }
+
+  // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+  // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+  // iff We know the shift amount is in range.
+  // TODO: when is it worth doing SUB(BW, N2) as well?
+  if (isPowerOf2_32(BitWidth)) {
+    APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+    if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+    if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
   }
 
   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 531885d3e9d..517880fb88e 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -404,12 +404,13 @@ define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_undef0_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl $9, %eax, %eax
+; X32-SSE2-NEXT:    shrl $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
@@ -438,19 +439,18 @@ define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_undef1_msk:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    andl $7, %ecx
-; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X32-SSE2-NEXT:    shldl %cl, %eax, %eax
+; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-SSE2-NEXT:    andb $7, %cl
+; X32-SSE2-NEXT:    shll %cl, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef1_msk:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    andl $7, %ecx
+; X64-AVX2-NEXT:    andb $7, %cl
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %eax, %eax
+; X64-AVX2-NEXT:    shll %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
@@ -461,13 +461,13 @@ define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_undef1_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl $9, %eax, %eax
+; X32-SSE2-NEXT:    shll $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef1_cst:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $9, %eax, %eax
+; X64-AVX2-NEXT:    shll $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
@@ -513,19 +513,18 @@ define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_undef0_msk:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    andl $7, %ecx
-; X32-SSE2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X32-SSE2-NEXT:    shrdl %cl, %eax, %eax
+; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-SSE2-NEXT:    andb $7, %cl
+; X32-SSE2-NEXT:    shrl %cl, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef0_msk:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %esi, %ecx
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    andl $7, %ecx
+; X64-AVX2-NEXT:    andb $7, %cl
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %eax, %eax
+; X64-AVX2-NEXT:    shrl %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
@@ -536,13 +535,13 @@ define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_undef0_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl $9, %eax, %eax
+; X32-SSE2-NEXT:    shrl $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef0_cst:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shrdl $9, %eax, %eax
+; X64-AVX2-NEXT:    shrl $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
@@ -592,12 +591,13 @@ define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_undef1_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl $9, %eax, %eax
+; X32-SSE2-NEXT:    shll $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shrdl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
@@ -645,15 +645,14 @@ define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
 define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_zero0_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shldl $9, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shrl $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_zero0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shldl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
   ret i32 %res
@@ -683,15 +682,14 @@ define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
 define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshl_i32_zero1_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shrdl $23, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shll $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_zero1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shrdl $23, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
   ret i32 %res
@@ -721,15 +719,14 @@ define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
 define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_zero0_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shldl $23, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shrl $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_zero0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shldl $23, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
   ret i32 %res
@@ -758,15 +755,14 @@ define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
 define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
 ; X32-SSE2-LABEL: fshr_i32_zero1_cst:
 ; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-SSE2-NEXT:    xorl %eax, %eax
-; X32-SSE2-NEXT:    shrdl $9, %ecx, %eax
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    shll $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_zero1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    shrdl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
   ret i32 %res
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-02-10 17:04:00 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-02-10 17:04:00 +0000
commit	5a82a788a28e3b255d4b8448e338e5d3566a2010 (patch)
tree	11edff1dd0e8378ec7f7392c7321efa442c746f3
parent	06a61b0b2b38ff47fc8bdee37ce4e089a821bc9f (diff)
download	bcm5719-llvm-5a82a788a28e3b255d4b8448e338e5d3566a2010.tar.gz bcm5719-llvm-5a82a788a28e3b255d4b8448e338e5d3566a2010.zip