diff options
author | Sanjay Patel <spatel@rotateright.com> | 2019-10-15 15:23:57 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2019-10-15 15:23:57 +0000 |
commit | d545c9056e00988d2d146f8f1440b2dd192f306b (patch) | |
tree | 0d688949f278c727605fefed671b867d104b6d8c | |
parent | b18170660e726cc97caa35cb0c406db184d4a8a2 (diff) | |
download | bcm5719-llvm-d545c9056e00988d2d146f8f1440b2dd192f306b.tar.gz bcm5719-llvm-d545c9056e00988d2d146f8f1440b2dd192f306b.zip |
[DAGCombiner] fold select-of-constants based on sign-bit test
Examples:
i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
This is a small generalization of a fold requested in PR43650:
https://bugs.llvm.org/show_bug.cgi?id=43650
The sign-bit of the condition operand can be used as a mask for the true operand:
https://rise4fun.com/Alive/paT
Note that we already handle some of the patterns (isNegative + scalar) because
there's an over-specialized, yet over-reaching fold for that in foldSelectCCToShiftAnd().
It doesn't use any TLI hooks, so I can't easily rip out that code even though we're
duplicating part of it here. This fold is guarded by TLI.convertSelectOfConstantsToMath(),
so it should not cause problems for targets that prefer select over shift.
Also worth noting: I thought we could generalize this further to include the case where
the true operand of the select is not constant, but Alive says that may allow poison to
pass through where it does not in the original select form of the code.
Differential Revision: https://reviews.llvm.org/D68949
llvm-svn: 374902
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/select-sra.ll | 79 |
2 files changed, 68 insertions, 54 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7fa95ce5cf9..43fa86b1182 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8173,6 +8173,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +/// If a (v)select has a condition value that is a sign-bit test, try to smear +/// the condition operand sign-bit across the value width and use it as a mask. +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue C1 = N->getOperand(1); + SDValue C2 = N->getOperand(2); + assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && + "Expected select-of-constants"); + + EVT VT = N->getValueType(0); + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || + VT != Cond.getOperand(0).getValueType()) + return SDValue(); + + // The inverted-condition + commuted-select variants of these patterns are + // canonicalized to these forms in IR. + SDValue X = Cond.getOperand(0); + SDValue CondC = Cond.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && + isAllOnesOrAllOnesSplat(C2)) { + // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::OR, DL, VT, Sra, C1); + } + if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { + // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::AND, DL, VT, Sra, C1); + } + return SDValue(); +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8248,6 +8285,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; } return SDValue(); @@ -8623,6 +8663,9 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); } + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + // The general case for select-of-constants: // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so diff --git a/llvm/test/CodeGen/X86/select-sra.ll b/llvm/test/CodeGen/X86/select-sra.ll index f0b6401a837..4d984aab06b 100644 --- a/llvm/test/CodeGen/X86/select-sra.ll +++ b/llvm/test/CodeGen/X86/select-sra.ll @@ -4,10 +4,9 @@ define i8 @isnonneg_i8(i8 %x) { ; CHECK-LABEL: isnonneg_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $42, %ecx -; CHECK-NEXT: movl $255, %eax -; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarb $7, %al +; CHECK-NEXT: orb $42, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %cond = icmp sgt i8 %x, -1 @@ -18,10 +17,9 @@ define i8 @isnonneg_i8(i8 %x) { define i16 @isnonneg_i16(i16 %x) { ; CHECK-LABEL: isnonneg_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: testw %di, %di -; CHECK-NEXT: movl $542, %ecx # imm = 0x21E -; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF -; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK-NEXT: movswl %di, %eax +; CHECK-NEXT: sarl $15, %eax +; CHECK-NEXT: orl $542, %eax # imm = 0x21E ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %cond = icmp sgt i16 %x, -1 @@ -32,10 +30,9 @@ define i16 @isnonneg_i16(i16 %x) { define i32 @isnonneg_i32(i32 %x) { ; CHECK-LABEL: isnonneg_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: movl $-42, %ecx -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarl $31, %eax +; CHECK-NEXT: orl $-42, %eax ; CHECK-NEXT: retq %cond = icmp sgt i32 %x, -1 %r = select i1 %cond, i32 -42, i32 -1 @@ -45,10 +42,9 @@ define i32 @isnonneg_i32(i32 %x) { define i64 @isnonneg_i64(i64 %x) { ; CHECK-LABEL: isnonneg_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: movl $2342342, %ecx # imm = 0x23BDC6 -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: cmovnsq %rcx, %rax +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: sarq $63, %rax +; CHECK-NEXT: orq $2342342, %rax # imm = 0x23BDC6 ; CHECK-NEXT: retq %cond = icmp sgt i64 %x, -1 %r = select i1 %cond, i64 2342342, i64 -1 @@ -58,10 +54,10 @@ define i64 @isnonneg_i64(i64 %x) { define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) { ; CHECK-LABEL: isnonneg_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtb %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pcmpgtb %xmm0, %xmm1 +; CHECK-NEXT: por {{.*}}(%rip), %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %cond = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %r = select <16 x i1> %cond, <16 x i8> <i8 12, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> @@ -71,9 +67,7 @@ define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) { define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) { ; CHECK-LABEL: isnonneg_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtw %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: psraw $15, %xmm0 ; CHECK-NEXT: por {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> @@ -84,9 +78,7 @@ define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) { define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) { ; CHECK-LABEL: isnonneg_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 ; CHECK-NEXT: por {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -97,18 +89,8 @@ define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) { define <2 x i64> @isnonneg_v2i64(<2 x i64> %x) { ; CHECK-LABEL: isnonneg_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor {{.*}}(%rip), %xmm0 -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [18446744071562067967,18446744071562067967] -; CHECK-NEXT: movdqa %xmm0, %xmm2 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-NEXT: pand %xmm3, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: por {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp sgt <2 x i64> %x, <i64 -1, i64 -1> @@ -182,10 +164,8 @@ define <16 x i8> @isneg_v16i8(<16 x i8> %x) { define <8 x i16> @isneg_v8i16(<8 x i16> %x) { ; CHECK-LABEL: isneg_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtw %xmm0, %xmm1 -; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: psraw $15, %xmm0 +; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp slt <8 x i16> %x, zeroinitializer %r = select <8 x i1> %cond, <8 x i16> <i16 1, i16 542, i16 542, i16 542, i16 542, i16 542, i16 542, i16 1>, <8 x i16> zeroinitializer @@ -195,10 +175,8 @@ define <8 x i16> @isneg_v8i16(<8 x i16> %x) { define <4 x i32> @isneg_v4i32(<4 x i32> %x) { ; CHECK-LABEL: isneg_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 -; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp slt <4 x i32> %x, zeroinitializer %r = select <4 x i1> %cond, <4 x i32> <i32 0, i32 42, i32 -42, i32 1>, <4 x i32> zeroinitializer @@ -208,15 +186,8 @@ define <4 x i32> @isneg_v4i32(<4 x i32> %x) { define <2 x i64> @isneg_v2i64(<2 x i64> %x) { ; CHECK-LABEL: isneg_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: movdqa %xmm1, %xmm2 -; CHECK-NEXT: pcmpgtd %xmm0, %xmm2 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-NEXT: pand %xmm2, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %cond = icmp slt <2 x i64> %x, zeroinitializer |