[DAGCombiner] fold select-of-constants based on sign-bit test

Examples: i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 This is a small generalization of a fold requested in PR43650: https://bugs.llvm.org/show_bug.cgi?id=43650 The sign-bit of the condition operand can be used as a mask for the true operand: https://rise4fun.com/Alive/paT Note that we already handle some of the patterns (isNegative + scalar) because there's an over-specialized, yet over-reaching fold for that in foldSelectCCToShiftAnd(). It doesn't use any TLI hooks, so I can't easily rip out that code even though we're duplicating part of it here. This fold is guarded by TLI.convertSelectOfConstantsToMath(), so it should not cause problems for targets that prefer select over shift. Also worth noting: I thought we could generalize this further to include the case where the true operand of the select is not constant, but Alive says that may allow poison to pass through where it does not in the original select form of the code. Differential Revision: https://reviews.llvm.org/D68949 llvm-svn: 374902
author: Sanjay Patel <spatel@rotateright.com> 2019-10-15 15:23:57 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-10-15 15:23:57 +0000
commit: d545c9056e00988d2d146f8f1440b2dd192f306b (patch)
tree: 0d688949f278c727605fefed671b867d104b6d8c
parent: b18170660e726cc97caa35cb0c406db184d4a8a2 (diff)
download: bcm5719-llvm-d545c9056e00988d2d146f8f1440b2dd192f306b.tar.gz
bcm5719-llvm-d545c9056e00988d2d146f8f1440b2dd192f306b.zip
2 files changed, 68 insertions, 54 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7fa95ce5cf9..43fa86b1182 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8173,6 +8173,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
   }
 }
 
+/// If a (v)select has a condition value that is a sign-bit test, try to smear
+/// the condition operand sign-bit across the value width and use it as a mask.
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+  SDValue Cond = N->getOperand(0);
+  SDValue C1 = N->getOperand(1);
+  SDValue C2 = N->getOperand(2);
+  assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
+         "Expected select-of-constants");
+
+  EVT VT = N->getValueType(0);
+  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
+      VT != Cond.getOperand(0).getValueType())
+    return SDValue();
+
+  // The inverted-condition + commuted-select variants of these patterns are
+  // canonicalized to these forms in IR.
+  SDValue X = Cond.getOperand(0);
+  SDValue CondC = Cond.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+  if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
+      isAllOnesOrAllOnesSplat(C2)) {
+    // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
+    SDLoc DL(N);
+    SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+    return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
+  }
+  if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
+    // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
+    SDLoc DL(N);
+    SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+    return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
+  }
+  return SDValue();
+}
+
 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
   SDValue Cond = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -8248,6 +8285,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
       }
+
+      if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+        return V;
     }
 
     return SDValue();
@@ -8623,6 +8663,9 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
   }
 
+  if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+    return V;
+
   // The general case for select-of-constants:
   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
diff --git a/llvm/test/CodeGen/X86/select-sra.ll b/llvm/test/CodeGen/X86/select-sra.ll
index f0b6401a837..4d984aab06b 100644
--- a/llvm/test/CodeGen/X86/select-sra.ll
+++ b/llvm/test/CodeGen/X86/select-sra.ll
@@ -4,10 +4,9 @@
 define i8 @isnonneg_i8(i8 %x) {
 ; CHECK-LABEL: isnonneg_i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movl $42, %ecx
-; CHECK-NEXT:    movl $255, %eax
-; CHECK-NEXT:    cmovnsl %ecx, %eax
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarb $7, %al
+; CHECK-NEXT:    orb $42, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %cond = icmp sgt i8 %x, -1
@@ -18,10 +17,9 @@ define i8 @isnonneg_i8(i8 %x) {
 define i16 @isnonneg_i16(i16 %x) {
 ; CHECK-LABEL: isnonneg_i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testw %di, %di
-; CHECK-NEXT:    movl $542, %ecx # imm = 0x21E
-; CHECK-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; CHECK-NEXT:    cmovnsl %ecx, %eax
+; CHECK-NEXT:    movswl %di, %eax
+; CHECK-NEXT:    sarl $15, %eax
+; CHECK-NEXT:    orl $542, %eax # imm = 0x21E
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %cond = icmp sgt i16 %x, -1
@@ -32,10 +30,9 @@ define i16 @isnonneg_i16(i16 %x) {
 define i32 @isnonneg_i32(i32 %x) {
 ; CHECK-LABEL: isnonneg_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    movl $-42, %ecx
-; CHECK-NEXT:    movl $-1, %eax
-; CHECK-NEXT:    cmovnsl %ecx, %eax
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarl $31, %eax
+; CHECK-NEXT:    orl $-42, %eax
 ; CHECK-NEXT:    retq
   %cond = icmp sgt i32 %x, -1
   %r = select i1 %cond, i32 -42, i32 -1
@@ -45,10 +42,9 @@ define i32 @isnonneg_i32(i32 %x) {
 define i64 @isnonneg_i64(i64 %x) {
 ; CHECK-LABEL: isnonneg_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    movl $2342342, %ecx # imm = 0x23BDC6
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    cmovnsq %rcx, %rax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    sarq $63, %rax
+; CHECK-NEXT:    orq $2342342, %rax # imm = 0x23BDC6
 ; CHECK-NEXT:    retq
   %cond = icmp sgt i64 %x, -1
   %r = select i1 %cond, i64 2342342, i64 -1
@@ -58,10 +54,10 @@ define i64 @isnonneg_i64(i64 %x) {
 define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
 ; CHECK-LABEL: isnonneg_v16i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtb %xmm1, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
-; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpgtb %xmm0, %xmm1
+; CHECK-NEXT:    por {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %r = select <16 x i1> %cond, <16 x i8> <i8 12, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -71,9 +67,7 @@ define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
 define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
 ; CHECK-LABEL: isnonneg_v8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtw %xmm1, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    psraw $15, %xmm0
 ; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -84,9 +78,7 @@ define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
 define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
 ; CHECK-LABEL: isnonneg_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtd %xmm1, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
 ; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -97,18 +89,8 @@ define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
 define <2 x i64> @isnonneg_v2i64(<2 x i64> %x) {
 ; CHECK-LABEL: isnonneg_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pxor {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [18446744071562067967,18446744071562067967]
-; CHECK-NEXT:    movdqa %xmm0, %xmm2
-; CHECK-NEXT:    pcmpgtd %xmm1, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-NEXT:    pand %xmm3, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
@@ -182,10 +164,8 @@ define <16 x i8> @isneg_v16i8(<16 x i8> %x) {
 define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
 ; CHECK-LABEL: isneg_v8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtw %xmm0, %xmm1
-; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    psraw $15, %xmm0
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp slt <8 x i16> %x, zeroinitializer
   %r = select <8 x i1> %cond, <8 x i16> <i16 1, i16 542, i16 542, i16 542, i16 542, i16 542, i16 542, i16 1>, <8 x i16> zeroinitializer
@@ -195,10 +175,8 @@ define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
 define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
 ; CHECK-LABEL: isneg_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
-; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp slt <4 x i32> %x, zeroinitializer
   %r = select <4 x i1> %cond, <4 x i32> <i32 0, i32 42, i32 -42, i32 1>, <4 x i32> zeroinitializer
@@ -208,15 +186,8 @@ define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
 define <2 x i64> @isneg_v2i64(<2 x i64> %x) {
 ; CHECK-LABEL: isneg_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
-; CHECK-NEXT:    pxor %xmm1, %xmm0
-; CHECK-NEXT:    movdqa %xmm1, %xmm2
-; CHECK-NEXT:    pcmpgtd %xmm0, %xmm2
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT:    pand %xmm2, %xmm1
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp slt <2 x i64> %x, zeroinitializer
author	Sanjay Patel <spatel@rotateright.com>	2019-10-15 15:23:57 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-10-15 15:23:57 +0000
commit	d545c9056e00988d2d146f8f1440b2dd192f306b (patch)
tree	0d688949f278c727605fefed671b867d104b6d8c
parent	b18170660e726cc97caa35cb0c406db184d4a8a2 (diff)
download	bcm5719-llvm-d545c9056e00988d2d146f8f1440b2dd192f306b.tar.gz bcm5719-llvm-d545c9056e00988d2d146f8f1440b2dd192f306b.zip