summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-04-26 20:26:46 +0000
committerSanjay Patel <spatel@rotateright.com>2017-04-26 20:26:46 +0000
commita0547c3d9f71ae721eab6fb9f2dd4bd2c8d75223 (patch)
tree5aba469bc6d533368442262979c3fbdac0fb6c61
parent2a906e1b34c39280947a1c9afbf2605c081c4376 (diff)
downloadbcm5719-llvm-a0547c3d9f71ae721eab6fb9f2dd4bd2c8d75223.tar.gz
bcm5719-llvm-a0547c3d9f71ae721eab6fb9f2dd4bd2c8d75223.zip
[DAGCombiner] add (sext i1 X), 1 --> zext (not i1 X)
Besides better codegen, the motivation is to be able to canonicalize this pattern in IR (currently we don't) knowing that the backend is prepared for that. This may also allow removing code for special constant cases in DAGCombiner::foldSelectOfConstants() that was added in D30180. Differential Revision: https://reviews.llvm.org/D31944 llvm-svn: 301457
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp29
-rw-r--r--llvm/test/CodeGen/ARM/bool-ext-inc.ll23
-rw-r--r--llvm/test/CodeGen/X86/bool-ext-inc.ll42
3 files changed, 50 insertions, 44 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1251ae6262b..0c289714c8f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1866,14 +1866,31 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (isNullConstant(N1))
return N0;
- // fold ((c1-A)+c2) -> (c1+c2)-A
if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
- if (N0.getOpcode() == ISD::SUB)
- if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
- N0.getOperand(1));
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
+ // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
+ N0.getOperand(1));
+ }
+
+ // add (sext i1 X), 1 -> zext (not i1 X)
+ // We don't transform this pattern:
+ // add (zext i1 X), -1 -> sext (not i1 X)
+ // because most (?) targets generate better code for the zext form.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ isOneConstantOrOneSplatConstant(N1)) {
+ SDValue X = N0.getOperand(0);
+ if ((!LegalOperations ||
+ (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
+ X.getScalarValueSizeInBits() == 1) {
+ SDValue Not = DAG.getNOT(DL, X, X.getValueType());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
}
+ }
}
if (SDValue NewSel = foldBinOpIntoSelect(N))
diff --git a/llvm/test/CodeGen/ARM/bool-ext-inc.ll b/llvm/test/CodeGen/ARM/bool-ext-inc.ll
index b91b9b25899..5f2ba8b109a 100644
--- a/llvm/test/CodeGen/ARM/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/ARM/bool-ext-inc.ll
@@ -4,7 +4,7 @@
define i32 @sext_inc(i1 zeroext %x) {
; CHECK-LABEL: sext_inc:
; CHECK: @ BB#0:
-; CHECK-NEXT: rsb r0, r0, #1
+; CHECK-NEXT: eor r0, r0, #1
; CHECK-NEXT: mov pc, lr
%ext = sext i1 %x to i32
%add = add i32 %ext, 1
@@ -14,14 +14,12 @@ define i32 @sext_inc(i1 zeroext %x) {
define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
; CHECK-LABEL: sext_inc_vec:
; CHECK: @ BB#0:
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vmov.i32 q9, #0x1f
-; CHECK-NEXT: vmov.i32 q10, #0x1
+; CHECK-NEXT: vmov.i16 d16, #0x1
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vmov.i32 q9, #0x1
+; CHECK-NEXT: veor d16, d17, d16
; CHECK-NEXT: vmovl.u16 q8, d16
-; CHECK-NEXT: vneg.s32 q9, q9
-; CHECK-NEXT: vshl.i32 q8, q8, #31
-; CHECK-NEXT: vshl.s32 q8, q8, q9
-; CHECK-NEXT: vadd.i32 q8, q8, q10
+; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
@@ -38,8 +36,8 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: vmov.i32 q10, #0x1
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vcgt.s32 q8, q9, q8
-; CHECK-NEXT: vadd.i32 q8, q8, q10
+; CHECK-NEXT: vcge.s32 q8, q8, q9
+; CHECK-NEXT: vand q8, q8, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
@@ -54,12 +52,11 @@ define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK: @ BB#0:
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vmov d19, r2, r3
+; CHECK-NEXT: vmov.i32 q10, #0x1
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vceq.i32 q8, q9, q8
-; CHECK-NEXT: vmov.i32 q9, #0x1
-; CHECK-NEXT: vmvn q8, q8
-; CHECK-NEXT: vadd.i32 q8, q8, q9
+; CHECK-NEXT: vand q8, q8, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
diff --git a/llvm/test/CodeGen/X86/bool-ext-inc.ll b/llvm/test/CodeGen/X86/bool-ext-inc.ll
index 1b69b554255..e292ccd0be1 100644
--- a/llvm/test/CodeGen/X86/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/X86/bool-ext-inc.ll
@@ -1,29 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s
-; FIXME: add (sext i1 X), 1 -> zext (not i1 X)
+; add (sext i1 X), 1 -> zext (not i1 X)
define i32 @sext_inc(i1 zeroext %x) nounwind {
; CHECK-LABEL: sext_inc:
; CHECK: # BB#0:
-; CHECK-NEXT: movzbl %dil, %ecx
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: xorb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: retq
%ext = sext i1 %x to i32
%add = add i32 %ext, 1
ret i32 %add
}
-; FIXME: add (sext i1 X), 1 -> zext (not i1 X)
+; add (sext i1 X), 1 -> zext (not i1 X)
define <4 x i32> @sext_inc_vec(<4 x i1> %x) nounwind {
; CHECK-LABEL: sext_inc_vec:
; CHECK: # BB#0:
-; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
+; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%ext = sext <4 x i1> %x to <4 x i32>
%add = add <4 x i32> %ext, <i32 1, i32 1, i32 1, i32 1>
@@ -35,7 +32,7 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp = icmp sgt <4 x i32> %x, %y
%ext = sext <4 x i1> %cmp to <4 x i32>
@@ -47,10 +44,7 @@ define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: cmpne_sext_inc_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp = icmp ne <4 x i32> %x, %y
%ext = sext <4 x i1> %cmp to <4 x i32>
@@ -63,7 +57,7 @@ define <4 x i64> @cmpgt_sext_inc_vec256(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
-; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%cmp = icmp sgt <4 x i64> %x, %y
%ext = sext <4 x i1> %cmp to <4 x i64>
@@ -75,13 +69,11 @@ define i32 @bool_logic_and_math(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK-LABEL: bool_logic_and_math:
; CHECK: # BB#0:
; CHECK-NEXT: cmpl %esi, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: sete %al
; CHECK-NEXT: cmpl %ecx, %edx
-; CHECK-NEXT: setne %cl
-; CHECK-NEXT: andb %al, %cl
-; CHECK-NEXT: movzbl %cl, %ecx
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retq
%cmp1 = icmp ne i32 %a, %b
%cmp2 = icmp ne i32 %c, %d
@@ -95,12 +87,12 @@ define <4 x i32> @bool_logic_and_math_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32>
; CHECK-LABEL: bool_logic_and_math_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp1 = icmp ne <4 x i32> %a, %b
%cmp2 = icmp ne <4 x i32> %c, %d
OpenPOWER on IntegriCloud