summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-07-27 16:42:55 +0000
committerSanjay Patel <spatel@rotateright.com>2018-07-27 16:42:55 +0000
commitc7abb416dcc8b58fe9a4233f2af0e31b0c342ca6 (patch)
treeb9894c248265286ef3b73acbaebb9345111caa99
parent1812d33e22beb15da22aadfffc57567989a19110 (diff)
downloadbcm5719-llvm-c7abb416dcc8b58fe9a4233f2af0e31b0c342ca6.tar.gz
bcm5719-llvm-c7abb416dcc8b58fe9a4233f2af0e31b0c342ca6.zip
[DAGCombiner] fold 'not' with signbit math
This is a follow-up suggested in D48970. Alive proofs: https://rise4fun.com/Alive/sII We can eliminate an instruction in the usual select-of-constants to bit hack transform by adjusting the add/sub with constant. This is always a win. There are more transforms that are likely wins, but they may need target hooks in case some targets do not benefit. This is another step towards making up for canonicalizing to select-of-constants in rL331486. llvm-svn: 338132
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp45
-rw-r--r--llvm/test/CodeGen/AArch64/signbit-shift.ll34
-rw-r--r--llvm/test/CodeGen/PowerPC/signbit-shift.ll21
-rw-r--r--llvm/test/CodeGen/X86/signbit-shift.ll43
4 files changed, 84 insertions, 59 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3fdea2d0d8d..7a99687757f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1997,6 +1997,45 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
}
+/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
+/// a shift and add with a different constant.
+static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // We need a constant operand for the add/sub, and the other operand is a
+ // logical shift right: add (srl), C or sub C, (srl).
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
+ if (!C || ShiftOp.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The shift must be of a 'not' value.
+ // TODO: Use isBitwiseNot() if it works with vectors.
+ SDValue Not = ShiftOp.getOperand(0);
+ if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
+ !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
+ return SDValue();
+
+ // The shift must be moving the sign bit to the least-significant-bit.
+ EVT VT = ShiftOp.getValueType();
+ SDValue ShAmt = ShiftOp.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
+ return SDValue();
+
+ // Eliminate the 'not' by adjusting the shift and add/sub constant:
+ // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
+ // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
+ SDLoc DL(N);
+ auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
+ SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
+ APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
+ return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2131,6 +2170,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -2656,6 +2698,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll
index 34d5ad4ebfb..521a0b8c149 100644
--- a/llvm/test/CodeGen/AArch64/signbit-shift.ll
+++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll
@@ -17,9 +17,8 @@ define i32 @zext_ifpos(i32 %x) {
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: lsr w8, w8, #31
-; CHECK-NEXT: add w0, w8, #41 // =41
+; CHECK-NEXT: asr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #42 // =42
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -67,9 +66,8 @@ define i32 @sext_ifpos(i32 %x) {
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: mov w9, #42
-; CHECK-NEXT: sub w0, w9, w8, lsr #31
+; CHECK-NEXT: lsr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #41 // =41
; CHECK-NEXT: ret
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
@@ -176,9 +174,8 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) {
define i32 @add_lshr_not(i32 %x) {
; CHECK-LABEL: add_lshr_not:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: lsr w8, w8, #31
-; CHECK-NEXT: add w0, w8, #41 // =41
+; CHECK-NEXT: asr w8, w0, #31
+; CHECK-NEXT: add w0, w8, #42 // =42
; CHECK-NEXT: ret
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -189,9 +186,9 @@ define i32 @add_lshr_not(i32 %x) {
define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: add_lshr_not_vec_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ushr v0.4s, v0.4s, #31
-; CHECK-NEXT: orr v0.4s, #42
+; CHECK-NEXT: movi v1.4s, #43
+; CHECK-NEXT: ssra v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
@@ -202,9 +199,9 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr_not(i32 %x) {
; CHECK-LABEL: sub_lshr_not:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: mov w9, #43
-; CHECK-NEXT: sub w0, w9, w8, lsr #31
+; CHECK-NEXT: mov w8, #42
+; CHECK-NEXT: bfxil w8, w0, #31, #1
+; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -215,10 +212,9 @@ define i32 @sub_lshr_not(i32 %x) {
define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: sub_lshr_not_vec_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn v0.16b, v0.16b
-; CHECK-NEXT: ushr v0.4s, v0.4s, #31
-; CHECK-NEXT: movi v1.4s, #42
-; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: movi v1.4s, #41
+; CHECK-NEXT: usra v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
index 63d7ca7c661..7bc9cef9590 100644
--- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
@@ -17,9 +17,8 @@ define i32 @zext_ifpos(i32 %x) {
define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: nor 3, 3, 3
-; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: addi 3, 3, 41
+; CHECK-NEXT: srawi 3, 3, 31
+; CHECK-NEXT: addi 3, 3, 42
; CHECK-NEXT: blr
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -184,9 +183,8 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) {
define i32 @add_lshr_not(i32 %x) {
; CHECK-LABEL: add_lshr_not:
; CHECK: # %bb.0:
-; CHECK-NEXT: nor 3, 3, 3
-; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: addi 3, 3, 41
+; CHECK-NEXT: srawi 3, 3, 31
+; CHECK-NEXT: addi 3, 3, 42
; CHECK-NEXT: blr
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -200,12 +198,11 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-NEXT: vspltisw 3, -16
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha
-; CHECK-NEXT: xxlnor 34, 34, 34
; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l
; CHECK-NEXT: vsubuwm 3, 4, 3
-; CHECK-NEXT: vsrw 2, 2, 3
+; CHECK-NEXT: vsraw 2, 2, 3
; CHECK-NEXT: lvx 3, 0, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
@@ -216,9 +213,8 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr_not(i32 %x) {
; CHECK-LABEL: sub_lshr_not:
; CHECK: # %bb.0:
-; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: srwi 3, 3, 31
-; CHECK-NEXT: subfic 3, 3, 43
+; CHECK-NEXT: ori 3, 3, 42
; CHECK-NEXT: blr
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -232,12 +228,11 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-NEXT: vspltisw 3, -16
; CHECK-NEXT: vspltisw 4, 15
; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha
-; CHECK-NEXT: xxlnor 34, 34, 34
; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l
; CHECK-NEXT: vsubuwm 3, 4, 3
; CHECK-NEXT: vsrw 2, 2, 3
; CHECK-NEXT: lvx 3, 0, 3
-; CHECK-NEXT: vsubuwm 2, 3, 2
+; CHECK-NEXT: vadduwm 2, 2, 3
; CHECK-NEXT: blr
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll
index e631da8f39c..b22c1a34a3d 100644
--- a/llvm/test/CodeGen/X86/signbit-shift.ll
+++ b/llvm/test/CodeGen/X86/signbit-shift.ll
@@ -19,9 +19,8 @@ define i32 @add_zext_ifpos(i32 %x) {
; CHECK-LABEL: add_zext_ifpos:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: leal 41(%rdi), %eax
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = zext i1 %c to i32
@@ -47,9 +46,8 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_tval_bigger:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: leal 41(%rdi), %eax
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 42, i32 41
@@ -71,10 +69,9 @@ define i32 @sext_ifpos(i32 %x) {
define i32 @add_sext_ifpos(i32 %x) {
; CHECK-LABEL: add_sext_ifpos:
; CHECK: # %bb.0:
-; CHECK-NEXT: notl %edi
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: leal 41(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%e = sext i1 %c to i32
@@ -98,10 +95,9 @@ define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
define i32 @sel_ifpos_fval_bigger(i32 %x) {
; CHECK-LABEL: sel_ifpos_fval_bigger:
; CHECK: # %bb.0:
-; CHECK-NEXT: notl %edi
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: movl $42, %eax
-; CHECK-NEXT: subl %edi, %eax
+; CHECK-NEXT: leal 41(%rdi), %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %x, -1
%r = select i1 %c, i32 41, i32 42
@@ -186,9 +182,8 @@ define i32 @add_lshr_not(i32 %x) {
; CHECK-LABEL: add_lshr_not:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: leal 41(%rdi), %eax
+; CHECK-NEXT: sarl $31, %edi
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -199,10 +194,8 @@ define i32 @add_lshr_not(i32 %x) {
define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: add_lshr_not_vec_splat:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm1, %xmm0
-; CHECK-NEXT: psrld $31, %xmm0
-; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
@@ -213,10 +206,9 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
define i32 @sub_lshr_not(i32 %x) {
; CHECK-LABEL: sub_lshr_not:
; CHECK: # %bb.0:
-; CHECK-NEXT: notl %edi
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: shrl $31, %edi
-; CHECK-NEXT: xorl $43, %edi
-; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: leal 42(%rdi), %eax
; CHECK-NEXT: retq
%not = xor i32 %x, -1
%sh = lshr i32 %not, 31
@@ -227,11 +219,8 @@ define i32 @sub_lshr_not(i32 %x) {
define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) {
; CHECK-LABEL: sub_lshr_not_vec_splat:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm0, %xmm1
-; CHECK-NEXT: psrld $31, %xmm1
-; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [42,42,42,42]
-; CHECK-NEXT: psubd %xmm1, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%c = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%e = lshr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
OpenPOWER on IntegriCloud