diff options
Diffstat (limited to 'llvm')
5 files changed, 43 insertions, 60 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cd6d33ffa9e..dad47944a0f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3119,6 +3119,16 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // A - (A & C) -> A & (~C) + if (N1.getOpcode() == ISD::AND && N1.getOperand(0) == N0 && + isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) { + SDValue InvC = + DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N1.getOperand(1).getNode(), + DAG.getAllOnesConstant(DL, VT).getNode()); + assert(InvC && "Constant folding failed"); + return DAG.getNode(ISD::AND, DL, VT, N0, InvC); + } + // fold (X - (-Y * Z)) -> (X + (Y * Z)) if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { if (N1.getOperand(0).getOpcode() == ISD::SUB && diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index 567aaf20799..ddf51b785ff 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -4,7 +4,7 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) { ; CHECK-LABEL: fast_float_mul: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB0_11 ; CHECK-NEXT: @ %bb.1: @ %vector.memcheck @@ -32,7 +32,6 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: cmp r6, #3 ; CHECK-NEXT: bhs .LBB0_6 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: mov r8, r7 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB0_8 ; CHECK-NEXT: .LBB0_4: @ %vector.ph @@ -46,8 +45,7 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: letp lr, .LBB0_5 ; CHECK-NEXT: b .LBB0_11 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new -; CHECK-NEXT: subs r3, r3, r7 -; CHECK-NEXT: mov r8, r7 +; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: add.w lr, r12, r3, lsr #2 ; CHECK-NEXT: movs r3, #0 @@ -78,10 +76,10 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: vstr s0, [r6, #12] ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r8, .LBB0_11 +; CHECK-NEXT: wls lr, r7, .LBB0_11 ; CHECK-NEXT: @ %bb.9: @ %for.body.epil.preheader ; CHECK-NEXT: mvn r3, #3 -; CHECK-NEXT: mov lr, r8 +; CHECK-NEXT: mov lr, r7 ; CHECK-NEXT: add.w r3, r3, r12, lsl #2 ; CHECK-NEXT: add r1, r3 ; CHECK-NEXT: add r2, r3 @@ -97,7 +95,7 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: le lr, .LBB0_10 ; CHECK-NEXT: .LBB0_11: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %vector.memcheck diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index 44548484ec1..ebb041d9372 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -1518,7 +1518,7 @@ for.body: ; preds = %for.body.preheader1 define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: half_half_mac: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cbz r2, .LBB9_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: subs r3, r2, #1 @@ -1527,18 +1527,16 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n ; CHECK-NEXT: bhs .LBB9_4 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: vldr s0, .LCPI9_0 -; CHECK-NEXT: mov r5, r4 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB9_6 ; CHECK-NEXT: .LBB9_3: ; CHECK-NEXT: vldr s0, .LCPI9_0 ; CHECK-NEXT: b .LBB9_9 ; CHECK-NEXT: .LBB9_4: @ %for.body.preheader.new -; CHECK-NEXT: subs r2, r2, r4 +; CHECK-NEXT: bic r2, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldr s0, .LCPI9_0 -; CHECK-NEXT: mov r5, r4 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 ; CHECK-NEXT: sub.w r3, r0, #8 @@ -1571,10 +1569,10 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: le lr, .LBB9_5 ; CHECK-NEXT: .LBB9_6: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r5, .LBB9_9 +; CHECK-NEXT: wls lr, r4, .LBB9_9 ; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader ; CHECK-NEXT: mvn r2, #1 -; CHECK-NEXT: mov lr, r5 +; CHECK-NEXT: mov lr, r4 ; CHECK-NEXT: add.w r2, r2, r12, lsl #1 ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: add r1, r2 @@ -1589,7 +1587,7 @@ define arm_aapcs_vfpcc float @half_half_mac(half* nocapture readonly %a, half* n ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: le lr, .LBB9_8 ; CHECK-NEXT: .LBB9_9: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.10: ; CHECK-NEXT: .LCPI9_0: @@ -1679,7 +1677,7 @@ for.body: ; preds = %for.body, %for.body define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: half_half_acc: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cbz r2, .LBB10_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: subs r3, r2, #1 @@ -1688,18 +1686,16 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n ; CHECK-NEXT: bhs .LBB10_4 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: vldr s0, .LCPI10_0 -; CHECK-NEXT: mov r5, r4 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB10_6 ; CHECK-NEXT: .LBB10_3: ; CHECK-NEXT: vldr s0, .LCPI10_0 ; CHECK-NEXT: b .LBB10_9 ; CHECK-NEXT: .LBB10_4: @ %for.body.preheader.new -; CHECK-NEXT: subs r2, r2, r4 +; CHECK-NEXT: bic r2, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldr s0, .LCPI10_0 -; CHECK-NEXT: mov r5, r4 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 ; CHECK-NEXT: sub.w r3, r0, #8 @@ -1732,10 +1728,10 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: le lr, .LBB10_5 ; CHECK-NEXT: .LBB10_6: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r5, .LBB10_9 +; CHECK-NEXT: wls lr, r4, .LBB10_9 ; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader ; CHECK-NEXT: mvn r2, #1 -; CHECK-NEXT: mov lr, r5 +; CHECK-NEXT: mov lr, r4 ; CHECK-NEXT: add.w r2, r2, r12, lsl #1 ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: add r1, r2 @@ -1750,7 +1746,7 @@ define arm_aapcs_vfpcc float @half_half_acc(half* nocapture readonly %a, half* n ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: le lr, .LBB10_8 ; CHECK-NEXT: .LBB10_9: @ %for.cond.cleanup -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.10: ; CHECK-NEXT: .LCPI10_0: @@ -1840,7 +1836,7 @@ for.body: ; preds = %for.body, %for.body define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: half_short_mac: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: cbz r2, .LBB11_3 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: subs r3, r2, #1 @@ -1849,18 +1845,16 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: bhs .LBB11_4 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: vldr s0, .LCPI11_0 -; CHECK-NEXT: mov r8, r7 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB11_6 ; CHECK-NEXT: .LBB11_3: ; CHECK-NEXT: vldr s0, .LCPI11_0 ; CHECK-NEXT: b .LBB11_9 ; CHECK-NEXT: .LBB11_4: @ %for.body.preheader.new -; CHECK-NEXT: subs r2, r2, r7 +; CHECK-NEXT: bic r2, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldr s0, .LCPI11_0 -; CHECK-NEXT: mov r8, r7 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 ; CHECK-NEXT: movs r3, #0 @@ -1901,13 +1895,13 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: le lr, .LBB11_5 ; CHECK-NEXT: .LBB11_6: @ %for.cond.cleanup.loopexit.unr-lcssa -; CHECK-NEXT: wls lr, r8, .LBB11_9 +; CHECK-NEXT: wls lr, r7, .LBB11_9 ; CHECK-NEXT: @ %bb.7: @ %for.body.epil.preheader ; CHECK-NEXT: mvn r3, #1 ; CHECK-NEXT: add.w r2, r3, r12, lsl #1 ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: add r1, r2 -; CHECK-NEXT: mov lr, r8 +; CHECK-NEXT: mov lr, r7 ; CHECK-NEXT: .LBB11_8: @ %for.body.epil ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r2, [r1, #2]! @@ -1920,7 +1914,7 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: le lr, .LBB11_8 ; CHECK-NEXT: .LBB11_9: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.10: ; CHECK-NEXT: .LCPI11_0: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 46d6fb2635a..f9c83b74bdd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -966,11 +966,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly ; CHECK-NEXT: beq .LBB9_4 ; CHECK-NEXT: @ %bb.2: @ %for.body.preheader ; CHECK-NEXT: sub.w r4, r12, #1 -; CHECK-NEXT: and r5, r12, #3 +; CHECK-NEXT: and r10, r12, #3 ; CHECK-NEXT: cmp r4, #3 ; CHECK-NEXT: bhs .LBB9_6 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: mov r10, r5 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB9_8 ; CHECK-NEXT: .LBB9_4: @ %vector.ph @@ -986,10 +985,9 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly ; CHECK-NEXT: letp lr, .LBB9_5 ; CHECK-NEXT: b .LBB9_11 ; CHECK-NEXT: .LBB9_6: @ %for.body.preheader.new -; CHECK-NEXT: sub.w r7, r12, r5 -; CHECK-NEXT: mov r10, r5 -; CHECK-NEXT: subs r7, #4 +; CHECK-NEXT: bic r7, r12, #3 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: subs r7, #4 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: add.w lr, lr, r7, lsr #2 ; CHECK-NEXT: dls lr, lr diff --git a/llvm/test/CodeGen/X86/align-down-const.ll b/llvm/test/CodeGen/X86/align-down-const.ll index 1181f20f935..86f2dacbac1 100644 --- a/llvm/test/CodeGen/X86/align-down-const.ll +++ b/llvm/test/CodeGen/X86/align-down-const.ll @@ -19,17 +19,13 @@ define i32 @t0_32(i32 %ptr) nounwind { ; X86-LABEL: t0_32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $15, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: andl $-16, %eax ; X86-NEXT: retl ; ; X64-LABEL: t0_32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: andl $15, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: andl $-16, %eax ; X64-NEXT: retq %bias = and i32 %ptr, 15 %r = sub i32 %ptr, %bias @@ -40,18 +36,13 @@ define i64 @t1_64(i64 %ptr) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $15, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: sbbl $0, %edx +; X86-NEXT: andl $-16, %eax ; X86-NEXT: retl ; ; X64-LABEL: t1_64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $15, %ecx -; X64-NEXT: subq %rcx, %rax +; X64-NEXT: andq $-16, %rax ; X64-NEXT: retq %bias = and i64 %ptr, 15 %r = sub i64 %ptr, %bias @@ -62,17 +53,13 @@ define i32 @t2_powerof2(i32 %ptr) nounwind { ; X86-LABEL: t2_powerof2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $16, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: andl $-17, %eax ; X86-NEXT: retl ; ; X64-LABEL: t2_powerof2: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: andl $16, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: andl $-17, %eax ; X64-NEXT: retq %bias = and i32 %ptr, 16 %r = sub i32 %ptr, %bias @@ -82,17 +69,13 @@ define i32 @t3_random_constant(i32 %ptr) nounwind { ; X86-LABEL: t3_random_constant: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $42, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: andl $-43, %eax ; X86-NEXT: retl ; ; X64-LABEL: t3_random_constant: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: andl $42, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: andl $-43, %eax ; X64-NEXT: retq %bias = and i32 %ptr, 42 %r = sub i32 %ptr, %bias @@ -109,7 +92,7 @@ define i32 @t4_extrause(i32 %ptr, i32* %bias_storage) nounwind { ; X86-NEXT: movl %eax, %edx ; X86-NEXT: andl $15, %edx ; X86-NEXT: movl %edx, (%ecx) -; X86-NEXT: subl %edx, %eax +; X86-NEXT: andl $-16, %eax ; X86-NEXT: retl ; ; X64-LABEL: t4_extrause: @@ -118,7 +101,7 @@ define i32 @t4_extrause(i32 %ptr, i32* %bias_storage) nounwind { ; X64-NEXT: movl %edi, %ecx ; X64-NEXT: andl $15, %ecx ; X64-NEXT: movl %ecx, (%rsi) -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: andl $-16, %eax ; X64-NEXT: retq %bias = and i32 %ptr, 15 store i32 %bias, i32* %bias_storage |