diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 45 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/signbit-shift.ll | 34 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/signbit-shift.ll | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/signbit-shift.ll | 43 |
4 files changed, 84 insertions, 59 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3fdea2d0d8d..7a99687757f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1997,6 +1997,45 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); } +/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into +/// a shift and add with a different constant. +static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Expecting add or sub"); + + // We need a constant operand for the add/sub, and the other operand is a + // logical shift right: add (srl), C or sub C, (srl). + bool IsAdd = N->getOpcode() == ISD::ADD; + SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); + SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); + ConstantSDNode *C = isConstOrConstSplat(ConstantOp); + if (!C || ShiftOp.getOpcode() != ISD::SRL) + return SDValue(); + + // The shift must be of a 'not' value. + // TODO: Use isBitwiseNot() if it works with vectors. + SDValue Not = ShiftOp.getOperand(0); + if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR || + !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1))) + return SDValue(); + + // The shift must be moving the sign bit to the least-significant-bit. + EVT VT = ShiftOp.getValueType(); + SDValue ShAmt = ShiftOp.getOperand(1); + ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); + if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1) + return SDValue(); + + // Eliminate the 'not' by adjusting the shift and add/sub constant: + // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) + // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) + SDLoc DL(N); + auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; + SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); + APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; + return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2131,6 +2170,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) return V; + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -2656,6 +2698,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) return V; + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll index 34d5ad4ebfb..521a0b8c149 100644 --- a/llvm/test/CodeGen/AArch64/signbit-shift.ll +++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll @@ -17,9 +17,8 @@ define i32 @zext_ifpos(i32 %x) { define i32 @add_zext_ifpos(i32 %x) { ; CHECK-LABEL: add_zext_ifpos: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: lsr w8, w8, #31 -; CHECK-NEXT: add w0, w8, #41 // =41 +; CHECK-NEXT: asr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #42 // =42 ; CHECK-NEXT: ret %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -67,9 +66,8 @@ define i32 @sext_ifpos(i32 %x) { define i32 @add_sext_ifpos(i32 %x) { ; CHECK-LABEL: add_sext_ifpos: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: mov w9, #42 -; CHECK-NEXT: sub w0, w9, w8, lsr #31 +; CHECK-NEXT: lsr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #41 // =41 ; CHECK-NEXT: ret %c = icmp sgt i32 %x, -1 %e = sext i1 %c to i32 @@ -176,9 +174,8 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) { define i32 @add_lshr_not(i32 %x) { ; CHECK-LABEL: add_lshr_not: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: lsr w8, w8, #31 -; CHECK-NEXT: add w0, w8, #41 // =41 +; CHECK-NEXT: asr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #42 // =42 ; CHECK-NEXT: ret %not = xor i32 %x, -1 %sh = lshr i32 %not, 31 @@ -189,9 +186,9 @@ define i32 @add_lshr_not(i32 %x) { define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) { ; CHECK-LABEL: add_lshr_not_vec_splat: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ushr v0.4s, v0.4s, #31 -; CHECK-NEXT: orr v0.4s, #42 +; CHECK-NEXT: movi v1.4s, #43 +; CHECK-NEXT: ssra v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> @@ -202,9 +199,9 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) { define i32 @sub_lshr_not(i32 %x) { ; CHECK-LABEL: sub_lshr_not: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w0 -; CHECK-NEXT: mov w9, #43 -; CHECK-NEXT: sub w0, w9, w8, lsr #31 +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: bfxil w8, w0, #31, #1 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %not = xor i32 %x, -1 %sh = lshr i32 %not, 31 @@ -215,10 +212,9 @@ define i32 @sub_lshr_not(i32 %x) { define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) { ; CHECK-LABEL: sub_lshr_not_vec_splat: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ushr v0.4s, v0.4s, #31 -; CHECK-NEXT: movi v1.4s, #42 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #41 +; CHECK-NEXT: usra v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll index 63d7ca7c661..7bc9cef9590 100644 --- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll +++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll @@ -17,9 +17,8 @@ define i32 @zext_ifpos(i32 %x) { define i32 @add_zext_ifpos(i32 %x) { ; CHECK-LABEL: add_zext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: srwi 3, 3, 31 -; CHECK-NEXT: addi 3, 3, 41 +; CHECK-NEXT: srawi 3, 3, 31 +; CHECK-NEXT: addi 3, 3, 42 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -184,9 +183,8 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) { define i32 @add_lshr_not(i32 %x) { ; CHECK-LABEL: add_lshr_not: ; CHECK: # %bb.0: -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: srwi 3, 3, 31 -; CHECK-NEXT: addi 3, 3, 41 +; CHECK-NEXT: srawi 3, 3, 31 +; CHECK-NEXT: addi 3, 3, 42 ; CHECK-NEXT: blr %not = xor i32 %x, -1 %sh = lshr i32 %not, 31 @@ -200,12 +198,11 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) { ; CHECK-NEXT: vspltisw 3, -16 ; CHECK-NEXT: vspltisw 4, 15 ; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; CHECK-NEXT: xxlnor 34, 34, 34 ; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l ; CHECK-NEXT: vsubuwm 3, 4, 3 -; CHECK-NEXT: vsrw 2, 2, 3 +; CHECK-NEXT: vsraw 2, 2, 3 ; CHECK-NEXT: lvx 3, 0, 3 -; CHECK-NEXT: xxlor 34, 34, 35 +; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> @@ -216,9 +213,8 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) { define i32 @sub_lshr_not(i32 %x) { ; CHECK-LABEL: sub_lshr_not: ; CHECK: # %bb.0: -; CHECK-NEXT: nor 3, 3, 3 ; CHECK-NEXT: srwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, 43 +; CHECK-NEXT: ori 3, 3, 42 ; CHECK-NEXT: blr %not = xor i32 %x, -1 %sh = lshr i32 %not, 31 @@ -232,12 +228,11 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) { ; CHECK-NEXT: vspltisw 3, -16 ; CHECK-NEXT: vspltisw 4, 15 ; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha -; CHECK-NEXT: xxlnor 34, 34, 34 ; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l ; CHECK-NEXT: vsubuwm 3, 4, 3 ; CHECK-NEXT: vsrw 2, 2, 3 ; CHECK-NEXT: lvx 3, 0, 3 -; CHECK-NEXT: vsubuwm 2, 3, 2 +; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll index e631da8f39c..b22c1a34a3d 100644 --- a/llvm/test/CodeGen/X86/signbit-shift.ll +++ b/llvm/test/CodeGen/X86/signbit-shift.ll @@ -19,9 +19,8 @@ define i32 @add_zext_ifpos(i32 %x) { ; CHECK-LABEL: add_zext_ifpos: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: notl %edi -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: leal 41(%rdi), %eax +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %e = zext i1 %c to i32 @@ -47,9 +46,8 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifpos_tval_bigger: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: notl %edi -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: leal 41(%rdi), %eax +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 42, i32 41 @@ -71,10 +69,9 @@ define i32 @sext_ifpos(i32 %x) { define i32 @add_sext_ifpos(i32 %x) { ; CHECK-LABEL: add_sext_ifpos: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: leal 41(%rdi), %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %e = sext i1 %c to i32 @@ -98,10 +95,9 @@ define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) { define i32 @sel_ifpos_fval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifpos_fval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: leal 41(%rdi), %eax ; CHECK-NEXT: retq %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 41, i32 42 @@ -186,9 +182,8 @@ define i32 @add_lshr_not(i32 %x) { ; CHECK-LABEL: add_lshr_not: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: notl %edi -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: leal 41(%rdi), %eax +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %not = xor i32 %x, -1 %sh = lshr i32 %not, 31 @@ -199,10 +194,8 @@ define i32 @add_lshr_not(i32 %x) { define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) { ; CHECK-LABEL: add_lshr_not_vec_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: psrld $31, %xmm0 -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> @@ -213,10 +206,9 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) { define i32 @sub_lshr_not(i32 %x) { ; CHECK-LABEL: sub_lshr_not: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: xorl $43, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %not = xor i32 %x, -1 %sh = lshr i32 %not, 31 @@ -227,11 +219,8 @@ define i32 @sub_lshr_not(i32 %x) { define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) { ; CHECK-LABEL: sub_lshr_not_vec_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm0, %xmm1 -; CHECK-NEXT: psrld $31, %xmm1 -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [42,42,42,42] -; CHECK-NEXT: psubd %xmm1, %xmm0 +; CHECK-NEXT: psrld $31, %xmm0 +; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> |

