summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp45
-rw-r--r--llvm/test/CodeGen/AArch64/signbit-shift.ll34
-rw-r--r--llvm/test/CodeGen/PowerPC/signbit-shift.ll21
-rw-r--r--llvm/test/CodeGen/X86/signbit-shift.ll43
4 files changed, 84 insertions, 59 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3fdea2d0d8d..7a99687757f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1997,6 +1997,45 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
}
+/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
+/// a shift and add with a different constant.
+static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // We need a constant operand for the add/sub, and the other operand is a
+ // logical shift right: add (srl), C or sub C, (srl).
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
+ if (!C || ShiftOp.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The shift must be of a 'not' value.
+ // TODO: Use isBitwiseNot() if it works with vectors.
+ SDValue Not = ShiftOp.getOperand(0);
+ if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
+ !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
+ return SDValue();
+
+ // The shift must be moving the sign bit to the least-significant-bit.
+ EVT VT = ShiftOp.getValueType();
+ SDValue ShAmt = ShiftOp.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
+ return SDValue();
+
+ // Eliminate the 'not' by adjusting the shift and add/sub constant:
+ // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
+ // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
+ SDLoc DL(N);
+ auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
+ SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
+ APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
+ return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2131,6 +2170,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -2656,6 +2698,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll
index 34d5ad4ebfb..521a0b8c149 100644
--- a/llvm/test/CodeGen/AArch64/signbit-shift.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll
@@ -17,9 +17,8 @@ define i32 @zext_ifpos(i32 %x) {
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: lsr w8, w8, #31
-; CHECK-NEXT: add w0, w8, #41 // =41
+; CHECK-NEXT: asr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #42 // =42
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -67,9 +66,8 @@ define i32 @sext_ifpos(i32 %x) {
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: mov w9, #42
-; CHECK-NEXT: sub w0, w9, w8, lsr #31
+; CHECK-NEXT: lsr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #41 // =41
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
@@ -176,9 +174,8 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) {
define i32 @add_lshr_not(i32 %x) {
; CHECK-LABEL: add_lshr_not:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: lsr w8, w8, #31
-; CHECK-NEXT: add w0, w8, #41 // =41
+; CHECK-NEXT: asr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #42 // =42
; CHECK-NEXT: ret
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -189,9 +186,9 @@ define i32 @add_lshr_not(i32 %x) {
define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: add_lshr_not_vec_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ushr v0.4s, v0.4s, #31
-; CHECK-NEXT: orr v0.4s, #42
+; CHECK-NEXT: movi v1.4s, #43
+; CHECK-NEXT: ssra v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
@@ -202,9 +199,9 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr_not(i32 %x) {
; CHECK-LABEL: sub_lshr_not:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: mov w9, #43
-; CHECK-NEXT: sub w0, w9, w8, lsr #31
+; CHECK-NEXT: mov w8, #42
+; CHECK-NEXT: bfxil w8, w0, #31, #1
+; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -215,10 +212,9 @@ define i32 @sub_lshr_not(i32 %x) {
define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: sub_lshr_not_vec_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ushr v0.4s, v0.4s, #31
-; CHECK-NEXT: movi v1.4s, #42
-; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: movi v1.4s, #41
+; CHECK-NEXT: usra v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
index 63d7ca7c661..7bc9cef9590 100644
--- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
@@ -17,9 +17,8 @@ define i32 @zext_ifpos(i32 %x) {
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: nor 3, 3, 3
-; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: addi 3, 3, 41
+; CHECK-NEXT: srawi 3, 3, 31
+; CHECK-NEXT: addi 3, 3, 42
; CHECK-NEXT: blr
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -184,9 +183,8 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) {
define i32 @add_lshr_not(i32 %x) {
; CHECK-LABEL: add_lshr_not:
; CHECK: # %bb.0:
-; CHECK-NEXT: nor 3, 3, 3
-; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: addi 3, 3, 41
+; CHECK-NEXT: srawi 3, 3, 31
+; CHECK-NEXT: addi 3, 3, 42
; CHECK-NEXT: blr
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -200,12 +198,11 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-NEXT: vspltisw 3, -16
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha
-; CHECK-NEXT: xxlnor 34, 34, 34
; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l
; CHECK-NEXT: vsubuwm 3, 4, 3
-; CHECK-NEXT: vsrw 2, 2, 3
+; CHECK-NEXT: vsraw 2, 2, 3
; CHECK-NEXT: lvx 3, 0, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
@@ -216,9 +213,8 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr_not(i32 %x) {
; CHECK-LABEL: sub_lshr_not:
; CHECK: # %bb.0:
-; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: subfic 3, 3, 43
+; CHECK-NEXT: ori 3, 3, 42
; CHECK-NEXT: blr
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -232,12 +228,11 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-NEXT: vspltisw 3, -16
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha
-; CHECK-NEXT: xxlnor 34, 34, 34
; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vsrw 2, 2, 3
; CHECK-NEXT: lvx 3, 0, 3
-; CHECK-NEXT: vsubuwm 2, 3, 2
+; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll
index e631da8f39c..b22c1a34a3d 100644
--- a/llvm/test/CodeGen/X86/signbit-shift.ll
+++ b/llvm/test/CodeGen/X86/signbit-shift.ll
@@ -19,9 +19,8 @@ define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: leal 41(%rdi), %eax
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -47,9 +46,8 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_tval_bigger:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: leal 41(%rdi), %eax
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 42, i32 41
@@ -71,10 +69,9 @@ define i32 @sext_ifpos(i32 %x) {
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: notl %edi
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: leal 41(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
@@ -98,10 +95,9 @@ define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
define i32 @sel_ifpos_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_fval_bigger:
; CHECK: # %bb.0:
-; CHECK-NEXT: notl %edi
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: leal 41(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 41, i32 42
@@ -186,9 +182,8 @@ define i32 @add_lshr_not(i32 %x) {
; CHECK-LABEL: add_lshr_not:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: leal 41(%rdi), %eax
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -199,10 +194,8 @@ define i32 @add_lshr_not(i32 %x) {
define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: add_lshr_not_vec_splat:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm1, %xmm0
-; CHECK-NEXT: psrld $31, %xmm0
-; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
@@ -213,10 +206,9 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr_not(i32 %x) {
; CHECK-LABEL: sub_lshr_not:
; CHECK: # %bb.0:
-; CHECK-NEXT: notl %edi
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: xorl $43, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -227,11 +219,8 @@ define i32 @sub_lshr_not(i32 %x) {
define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: sub_lshr_not_vec_splat:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm0, %xmm1
-; CHECK-NEXT: psrld $31, %xmm1
-; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [42,42,42,42]
-; CHECK-NEXT: psubd %xmm1, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
OpenPOWER on IntegriCloud