summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp43
-rw-r--r--llvm/test/CodeGen/X86/select-sra.ll79
2 files changed, 68 insertions, 54 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7fa95ce5cf9..43fa86b1182 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8173,6 +8173,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
+/// If a (v)select has a condition value that is a sign-bit test, try to smear
+/// the condition operand sign-bit across the value width and use it as a mask.
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+ SDValue Cond = N->getOperand(0);
+ SDValue C1 = N->getOperand(1);
+ SDValue C2 = N->getOperand(2);
+ assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
+ "Expected select-of-constants");
+
+ EVT VT = N->getValueType(0);
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
+ VT != Cond.getOperand(0).getValueType())
+ return SDValue();
+
+ // The inverted-condition + commuted-select variants of these patterns are
+ // canonicalized to these forms in IR.
+ SDValue X = Cond.getOperand(0);
+ SDValue CondC = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
+ isAllOnesOrAllOnesSplat(C2)) {
+ // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
+ }
+ if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
+ // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8248,6 +8285,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
}
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
}
return SDValue();
@@ -8623,6 +8663,9 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
}
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
// The general case for select-of-constants:
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
// ...but that only makes sense if a vselect is slower than 2 logic ops, so
diff --git a/llvm/test/CodeGen/X86/select-sra.ll b/llvm/test/CodeGen/X86/select-sra.ll
index f0b6401a837..4d984aab06b 100644
--- a/llvm/test/CodeGen/X86/select-sra.ll
+++ b/llvm/test/CodeGen/X86/select-sra.ll
@@ -4,10 +4,9 @@
define i8 @isnonneg_i8(i8 %x) {
; CHECK-LABEL: isnonneg_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movl $42, %ecx
-; CHECK-NEXT: movl $255, %eax
-; CHECK-NEXT: cmovnsl %ecx, %eax
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: sarb $7, %al
+; CHECK-NEXT: orb $42, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%cond = icmp sgt i8 %x, -1
@@ -18,10 +17,9 @@ define i8 @isnonneg_i8(i8 %x) {
define i16 @isnonneg_i16(i16 %x) {
; CHECK-LABEL: isnonneg_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: testw %di, %di
-; CHECK-NEXT: movl $542, %ecx # imm = 0x21E
-; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
-; CHECK-NEXT: cmovnsl %ecx, %eax
+; CHECK-NEXT: movswl %di, %eax
+; CHECK-NEXT: sarl $15, %eax
+; CHECK-NEXT: orl $542, %eax # imm = 0x21E
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%cond = icmp sgt i16 %x, -1
@@ -32,10 +30,9 @@ define i16 @isnonneg_i16(i16 %x) {
define i32 @isnonneg_i32(i32 %x) {
; CHECK-LABEL: isnonneg_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: movl $-42, %ecx
-; CHECK-NEXT: movl $-1, %eax
-; CHECK-NEXT: cmovnsl %ecx, %eax
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: sarl $31, %eax
+; CHECK-NEXT: orl $-42, %eax
; CHECK-NEXT: retq
%cond = icmp sgt i32 %x, -1
%r = select i1 %cond, i32 -42, i32 -1
@@ -45,10 +42,9 @@ define i32 @isnonneg_i32(i32 %x) {
define i64 @isnonneg_i64(i64 %x) {
; CHECK-LABEL: isnonneg_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: movl $2342342, %ecx # imm = 0x23BDC6
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: cmovnsq %rcx, %rax
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: orq $2342342, %rax # imm = 0x23BDC6
; CHECK-NEXT: retq
%cond = icmp sgt i64 %x, -1
%r = select i1 %cond, i64 2342342, i64 -1
@@ -58,10 +54,10 @@ define i64 @isnonneg_i64(i64 %x) {
define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
; CHECK-LABEL: isnonneg_v16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pcmpgtb %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
-; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pcmpgtb %xmm0, %xmm1
+; CHECK-NEXT: por {{.*}}(%rip), %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%cond = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%r = select <16 x i1> %cond, <16 x i8> <i8 12, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -71,9 +67,7 @@ define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
; CHECK-LABEL: isnonneg_v8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pcmpgtw %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: psraw $15, %xmm0
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -84,9 +78,7 @@ define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
; CHECK-LABEL: isnonneg_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -97,18 +89,8 @@ define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
define <2 x i64> @isnonneg_v2i64(<2 x i64> %x) {
; CHECK-LABEL: isnonneg_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0
-; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [18446744071562067967,18446744071562067967]
-; CHECK-NEXT: movdqa %xmm0, %xmm2
-; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-NEXT: pand %xmm3, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT: por %xmm0, %xmm1
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm0
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
@@ -182,10 +164,8 @@ define <16 x i8> @isneg_v16i8(<16 x i8> %x) {
define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
; CHECK-LABEL: isneg_v8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pcmpgtw %xmm0, %xmm1
-; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: psraw $15, %xmm0
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp slt <8 x i16> %x, zeroinitializer
%r = select <8 x i1> %cond, <8 x i16> <i16 1, i16 542, i16 542, i16 542, i16 542, i16 542, i16 542, i16 1>, <8 x i16> zeroinitializer
@@ -195,10 +175,8 @@ define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
; CHECK-LABEL: isneg_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
-; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp slt <4 x i32> %x, zeroinitializer
%r = select <4 x i1> %cond, <4 x i32> <i32 0, i32 42, i32 -42, i32 1>, <4 x i32> zeroinitializer
@@ -208,15 +186,8 @@ define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
define <2 x i64> @isneg_v2i64(<2 x i64> %x) {
; CHECK-LABEL: isneg_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
-; CHECK-NEXT: pxor %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm1, %xmm2
-; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT: pand %xmm2, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
-; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%cond = icmp slt <2 x i64> %x, zeroinitializer
OpenPOWER on IntegriCloud