summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp29
-rw-r--r--llvm/test/CodeGen/ARM/bool-ext-inc.ll23
-rw-r--r--llvm/test/CodeGen/X86/bool-ext-inc.ll42
3 files changed, 50 insertions, 44 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1251ae6262b..0c289714c8f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1866,14 +1866,31 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (isNullConstant(N1))
return N0;
- // fold ((c1-A)+c2) -> (c1+c2)-A
if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
- if (N0.getOpcode() == ISD::SUB)
- if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
- N0.getOperand(1));
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
+ // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
+ N0.getOperand(1));
+ }
+
+ // add (sext i1 X), 1 -> zext (not i1 X)
+ // We don't transform this pattern:
+ // add (zext i1 X), -1 -> sext (not i1 X)
+ // because most (?) targets generate better code for the zext form.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ isOneConstantOrOneSplatConstant(N1)) {
+ SDValue X = N0.getOperand(0);
+ if ((!LegalOperations ||
+ (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
+ X.getScalarValueSizeInBits() == 1) {
+ SDValue Not = DAG.getNOT(DL, X, X.getValueType());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
}
+ }
}
if (SDValue NewSel = foldBinOpIntoSelect(N))
diff --git a/llvm/test/CodeGen/ARM/bool-ext-inc.ll b/llvm/test/CodeGen/ARM/bool-ext-inc.ll
index b91b9b25899..5f2ba8b109a 100644
--- a/llvm/test/CodeGen/ARM/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/ARM/bool-ext-inc.ll
@@ -4,7 +4,7 @@
define i32 @sext_inc(i1 zeroext %x) {
; CHECK-LABEL: sext_inc:
; CHECK: @ BB#0:
-; CHECK-NEXT: rsb r0, r0, #1
+; CHECK-NEXT: eor r0, r0, #1
; CHECK-NEXT: mov pc, lr
%ext = sext i1 %x to i32
%add = add i32 %ext, 1
@@ -14,14 +14,12 @@ define i32 @sext_inc(i1 zeroext %x) {
define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
; CHECK-LABEL: sext_inc_vec:
; CHECK: @ BB#0:
-; CHECK-NEXT: vmov d16, r0, r1
-; CHECK-NEXT: vmov.i32 q9, #0x1f
-; CHECK-NEXT: vmov.i32 q10, #0x1
+; CHECK-NEXT: vmov.i16 d16, #0x1
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vmov.i32 q9, #0x1
+; CHECK-NEXT: veor d16, d17, d16
; CHECK-NEXT: vmovl.u16 q8, d16
-; CHECK-NEXT: vneg.s32 q9, q9
-; CHECK-NEXT: vshl.i32 q8, q8, #31
-; CHECK-NEXT: vshl.s32 q8, q8, q9
-; CHECK-NEXT: vadd.i32 q8, q8, q10
+; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
@@ -38,8 +36,8 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: vmov.i32 q10, #0x1
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vcgt.s32 q8, q9, q8
-; CHECK-NEXT: vadd.i32 q8, q8, q10
+; CHECK-NEXT: vcge.s32 q8, q8, q9
+; CHECK-NEXT: vand q8, q8, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
@@ -54,12 +52,11 @@ define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK: @ BB#0:
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vmov d19, r2, r3
+; CHECK-NEXT: vmov.i32 q10, #0x1
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vceq.i32 q8, q9, q8
-; CHECK-NEXT: vmov.i32 q9, #0x1
-; CHECK-NEXT: vmvn q8, q8
-; CHECK-NEXT: vadd.i32 q8, q8, q9
+; CHECK-NEXT: vand q8, q8, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
diff --git a/llvm/test/CodeGen/X86/bool-ext-inc.ll b/llvm/test/CodeGen/X86/bool-ext-inc.ll
index 1b69b554255..e292ccd0be1 100644
--- a/llvm/test/CodeGen/X86/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/X86/bool-ext-inc.ll
@@ -1,29 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s
-; FIXME: add (sext i1 X), 1 -> zext (not i1 X)
+; add (sext i1 X), 1 -> zext (not i1 X)
define i32 @sext_inc(i1 zeroext %x) nounwind {
; CHECK-LABEL: sext_inc:
; CHECK: # BB#0:
-; CHECK-NEXT: movzbl %dil, %ecx
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: xorb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: retq
%ext = sext i1 %x to i32
%add = add i32 %ext, 1
ret i32 %add
}
-; FIXME: add (sext i1 X), 1 -> zext (not i1 X)
+; add (sext i1 X), 1 -> zext (not i1 X)
define <4 x i32> @sext_inc_vec(<4 x i1> %x) nounwind {
; CHECK-LABEL: sext_inc_vec:
; CHECK: # BB#0:
-; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
+; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%ext = sext <4 x i1> %x to <4 x i32>
%add = add <4 x i32> %ext, <i32 1, i32 1, i32 1, i32 1>
@@ -35,7 +32,7 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp = icmp sgt <4 x i32> %x, %y
%ext = sext <4 x i1> %cmp to <4 x i32>
@@ -47,10 +44,7 @@ define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: cmpne_sext_inc_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp = icmp ne <4 x i32> %x, %y
%ext = sext <4 x i1> %cmp to <4 x i32>
@@ -63,7 +57,7 @@ define <4 x i64> @cmpgt_sext_inc_vec256(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
-; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%cmp = icmp sgt <4 x i64> %x, %y
%ext = sext <4 x i1> %cmp to <4 x i64>
@@ -75,13 +69,11 @@ define i32 @bool_logic_and_math(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK-LABEL: bool_logic_and_math:
; CHECK: # BB#0:
; CHECK-NEXT: cmpl %esi, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: sete %al
; CHECK-NEXT: cmpl %ecx, %edx
-; CHECK-NEXT: setne %cl
-; CHECK-NEXT: andb %al, %cl
-; CHECK-NEXT: movzbl %cl, %ecx
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: retq
%cmp1 = icmp ne i32 %a, %b
%cmp2 = icmp ne i32 %c, %d
@@ -95,12 +87,12 @@ define <4 x i32> @bool_logic_and_math_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32>
; CHECK-LABEL: bool_logic_and_math_vec:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
-; CHECK-NEXT: vpxor %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp1 = icmp ne <4 x i32> %a, %b
%cmp2 = icmp ne <4 x i32> %c, %d
OpenPOWER on IntegriCloud