summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp10
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll12
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll36
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll8
-rw-r--r--llvm/test/CodeGen/X86/align-down-const.ll37
5 files changed, 43 insertions, 60 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cd6d33ffa9e..dad47944a0f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3119,6 +3119,16 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // A - (A & C) -> A & (~C)
+ if (N1.getOpcode() == ISD::AND && N1.getOperand(0) == N0 &&
+ isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue InvC =
+ DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N1.getOperand(1).getNode(),
+ DAG.getAllOnesConstant(DL, VT).getNode());
+ assert(InvC && "Constant folding failed");
+ return DAG.getNode(ISD::AND, DL, VT, N0, InvC);
+ }
+
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
index 567aaf20799..ddf51b785ff 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
@@ -4,7 +4,7 @@
define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) {
; CHECK-LABEL: fast_float_mul:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq.w .LBB0_11
; CHECK-NEXT: @ %bb.1: @ %vector.memcheck
@@ -32,7 +32,6 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
; CHECK-NEXT: cmp r6, #3
; CHECK-NEXT: bhs .LBB0_6
; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: mov r8, r7
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: b .LBB0_8
; CHECK-NEXT: .LBB0_4: @ %vector.ph
@@ -46,8 +45,7 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
; CHECK-NEXT: letp lr, .LBB0_5
; CHECK-NEXT: b .LBB0_11
; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new
-; CHECK-NEXT: subs r3, r3, r7
-; CHECK-NEXT: mov r8, r7
+; CHECK-NEXT: bic r3, r3, #3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: movs r3, #0
@@ -78,10 +76,10 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
; CHECK-NEXT: vstr s0, [r6, #12]
; CHECK-NEXT: le lr, .LBB0_7
; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT: wls lr, r8, .LBB0_11
+; CHECK-NEXT: wls lr, r7, .LBB0_11
; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader
; CHECK-NEXT: mvn r3, #3
-; CHECK-NEXT: mov lr, r8
+; CHECK-NEXT: mov lr, r7
; CHECK-NEXT: add.w r3, r3, r12, lsl #2
; CHECK-NEXT: add r1, r3
; CHECK-NEXT: add r2, r3
@@ -97,7 +95,7 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: le lr, .LBB0_10
; CHECK-NEXT: .LBB0_11: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%cmp8 = icmp eq i32 %N, 0
br i1 %cmp8, label %for.cond.cleanup, label %vector.memcheck
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
index 44548484ec1..ebb041d9372 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll
@@ -1518,7 +1518,7 @@ for.body: ; preds = %for.body.preheader1
define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* nocapture readonly %b, i32 %N) {
; CHECK-LABEL: half_half_mac:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cbz r2, .LBB9_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
@@ -1527,18 +1527,16 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
; CHECK-NEXT: bhs .LBB9_4
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vldr s0, .LCPI9_0
-; CHECK-NEXT: mov r5, r4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: b .LBB9_6
; CHECK-NEXT: .LBB9_3:
; CHECK-NEXT: vldr s0, .LCPI9_0
; CHECK-NEXT: b .LBB9_9
; CHECK-NEXT: .LBB9_4: @ %for.body.preheader.new
-; CHECK-NEXT: subs r2, r2, r4
+; CHECK-NEXT: bic r2, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI9_0
-; CHECK-NEXT: mov r5, r4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: sub.w r3, r0, #8
@@ -1571,10 +1569,10 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB9_5
; CHECK-NEXT: .LBB9_6: @ %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT: wls lr, r5, .LBB9_9
+; CHECK-NEXT: wls lr, r4, .LBB9_9
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
; CHECK-NEXT: mvn r2, #1
-; CHECK-NEXT: mov lr, r5
+; CHECK-NEXT: mov lr, r4
; CHECK-NEXT: add.w r2, r2, r12, lsl #1
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: add r1, r2
@@ -1589,7 +1587,7 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB9_8
; CHECK-NEXT: .LBB9_9: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.10:
; CHECK-NEXT: .LCPI9_0:
@@ -1679,7 +1677,7 @@ for.body: ; preds = %for.body, %for.body
define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* nocapture readonly %b, i32 %N) {
; CHECK-LABEL: half_half_acc:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: cbz r2, .LBB10_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
@@ -1688,18 +1686,16 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
; CHECK-NEXT: bhs .LBB10_4
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vldr s0, .LCPI10_0
-; CHECK-NEXT: mov r5, r4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: b .LBB10_6
; CHECK-NEXT: .LBB10_3:
; CHECK-NEXT: vldr s0, .LCPI10_0
; CHECK-NEXT: b .LBB10_9
; CHECK-NEXT: .LBB10_4: @ %for.body.preheader.new
-; CHECK-NEXT: subs r2, r2, r4
+; CHECK-NEXT: bic r2, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI10_0
-; CHECK-NEXT: mov r5, r4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: sub.w r3, r0, #8
@@ -1732,10 +1728,10 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB10_5
; CHECK-NEXT: .LBB10_6: @ %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT: wls lr, r5, .LBB10_9
+; CHECK-NEXT: wls lr, r4, .LBB10_9
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
; CHECK-NEXT: mvn r2, #1
-; CHECK-NEXT: mov lr, r5
+; CHECK-NEXT: mov lr, r4
; CHECK-NEXT: add.w r2, r2, r12, lsl #1
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: add r1, r2
@@ -1750,7 +1746,7 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB10_8
; CHECK-NEXT: .LBB10_9: @ %for.cond.cleanup
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.10:
; CHECK-NEXT: .LCPI10_0:
@@ -1840,7 +1836,7 @@ for.body: ; preds = %for.body, %for.body
define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) {
; CHECK-LABEL: half_short_mac:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: cbz r2, .LBB11_3
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-NEXT: subs r3, r2, #1
@@ -1849,18 +1845,16 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: bhs .LBB11_4
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: vldr s0, .LCPI11_0
-; CHECK-NEXT: mov r8, r7
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: b .LBB11_6
; CHECK-NEXT: .LBB11_3:
; CHECK-NEXT: vldr s0, .LCPI11_0
; CHECK-NEXT: b .LBB11_9
; CHECK-NEXT: .LBB11_4: @ %for.body.preheader.new
-; CHECK-NEXT: subs r2, r2, r7
+; CHECK-NEXT: bic r2, r2, #3
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldr s0, .LCPI11_0
-; CHECK-NEXT: mov r8, r7
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, r3, r2, lsr #2
; CHECK-NEXT: movs r3, #0
@@ -1901,13 +1895,13 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB11_5
; CHECK-NEXT: .LBB11_6: @ %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT: wls lr, r8, .LBB11_9
+; CHECK-NEXT: wls lr, r7, .LBB11_9
; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader
; CHECK-NEXT: mvn r3, #1
; CHECK-NEXT: add.w r2, r3, r12, lsl #1
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: add r1, r2
-; CHECK-NEXT: mov lr, r8
+; CHECK-NEXT: mov lr, r7
; CHECK-NEXT: .LBB11_8: @ %for.body.epil
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r2, [r1, #2]!
@@ -1920,7 +1914,7 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: vadd.f32 s0, s0, s2
; CHECK-NEXT: le lr, .LBB11_8
; CHECK-NEXT: .LBB11_9: @ %for.cond.cleanup
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.10:
; CHECK-NEXT: .LCPI11_0:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
index 46d6fb2635a..f9c83b74bdd 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
@@ -966,11 +966,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
; CHECK-NEXT: beq .LBB9_4
; CHECK-NEXT: @ %bb.2: @ %for.body.preheader
; CHECK-NEXT: sub.w r4, r12, #1
-; CHECK-NEXT: and r5, r12, #3
+; CHECK-NEXT: and r10, r12, #3
; CHECK-NEXT: cmp r4, #3
; CHECK-NEXT: bhs .LBB9_6
; CHECK-NEXT: @ %bb.3:
-; CHECK-NEXT: mov r10, r5
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: b .LBB9_8
; CHECK-NEXT: .LBB9_4: @ %vector.ph
@@ -986,10 +985,9 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
; CHECK-NEXT: letp lr, .LBB9_5
; CHECK-NEXT: b .LBB9_11
; CHECK-NEXT: .LBB9_6: @ %for.body.preheader.new
-; CHECK-NEXT: sub.w r7, r12, r5
-; CHECK-NEXT: mov r10, r5
-; CHECK-NEXT: subs r7, #4
+; CHECK-NEXT: bic r7, r12, #3
; CHECK-NEXT: movs r4, #0
+; CHECK-NEXT: subs r7, #4
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: add.w lr, lr, r7, lsr #2
; CHECK-NEXT: dls lr, lr
diff --git a/llvm/test/CodeGen/X86/align-down-const.ll b/llvm/test/CodeGen/X86/align-down-const.ll
index 1181f20f935..86f2dacbac1 100644
--- a/llvm/test/CodeGen/X86/align-down-const.ll
+++ b/llvm/test/CodeGen/X86/align-down-const.ll
@@ -19,17 +19,13 @@ define i32 @t0_32(i32 %ptr) nounwind {
; X86-LABEL: t0_32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $15, %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: andl $-16, %eax
; X86-NEXT: retl
;
; X64-LABEL: t0_32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andl $15, %ecx
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: andl $-16, %eax
; X64-NEXT: retq
%bias = and i32 %ptr, 15
%r = sub i32 %ptr, %bias
@@ -40,18 +36,13 @@ define i64 @t1_64(i64 %ptr) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $15, %ecx
-; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: andl $-16, %eax
; X86-NEXT: retl
;
; X64-LABEL: t1_64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: andl $15, %ecx
-; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: andq $-16, %rax
; X64-NEXT: retq
%bias = and i64 %ptr, 15
%r = sub i64 %ptr, %bias
@@ -62,17 +53,13 @@ define i32 @t2_powerof2(i32 %ptr) nounwind {
; X86-LABEL: t2_powerof2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $16, %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: andl $-17, %eax
; X86-NEXT: retl
;
; X64-LABEL: t2_powerof2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andl $16, %ecx
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: andl $-17, %eax
; X64-NEXT: retq
%bias = and i32 %ptr, 16
%r = sub i32 %ptr, %bias
@@ -82,17 +69,13 @@ define i32 @t3_random_constant(i32 %ptr) nounwind {
; X86-LABEL: t3_random_constant:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $42, %ecx
-; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: andl $-43, %eax
; X86-NEXT: retl
;
; X64-LABEL: t3_random_constant:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andl $42, %ecx
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: andl $-43, %eax
; X64-NEXT: retq
%bias = and i32 %ptr, 42
%r = sub i32 %ptr, %bias
@@ -109,7 +92,7 @@ define i32 @t4_extrause(i32 %ptr, i32* %bias_storage) nounwind {
; X86-NEXT: movl %eax, %edx
; X86-NEXT: andl $15, %edx
; X86-NEXT: movl %edx, (%ecx)
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: andl $-16, %eax
; X86-NEXT: retl
;
; X64-LABEL: t4_extrause:
@@ -118,7 +101,7 @@ define i32 @t4_extrause(i32 %ptr, i32* %bias_storage) nounwind {
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: andl $15, %ecx
; X64-NEXT: movl %ecx, (%rsi)
-; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: andl $-16, %eax
; X64-NEXT: retq
%bias = and i32 %ptr, 15
store i32 %bias, i32* %bias_storage
OpenPOWER on IntegriCloud