diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/signbit-shift.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/signbit-shift.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/signbit-shift.ll | 36 |
4 files changed, 45 insertions, 36 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6c6b225bc14..5ce5bab71b4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2743,6 +2743,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // Prefer an add for more folding potential and possibly better codegen: + // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) + if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { + SDValue ShAmt = N1.getOperand(1); + ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); + if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) { + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); + return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll index b554ce15872..250290aa234 100644 --- a/llvm/test/CodeGen/AArch64/signbit-shift.ll +++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll @@ -150,8 +150,8 @@ define i32 @sext_ifneg(i32 %x) { define i32 @add_sext_ifneg(i32 %x) { ; CHECK-LABEL: add_sext_ifneg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: sub w0, w8, w0, lsr #31 +; CHECK-NEXT: asr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #42 // =42 ; CHECK-NEXT: ret %c = icmp slt i32 %x, 0 %e = sext i1 %c to i32 @@ -225,7 +225,7 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) { define i32 @sub_lshr(i32 %x, i32 %y) { ; CHECK-LABEL: sub_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w0, w1, w0, lsr #31 +; CHECK-NEXT: add w0, w1, w0, asr #31 ; CHECK-NEXT: ret %sh = lshr i32 %x, 31 %r = sub i32 %y, %sh @@ -235,8 +235,8 @@ define i32 @sub_lshr(i32 %x, i32 %y) { define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: sub_lshr_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushr v0.4s, v0.4s, #31 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ssra v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %r = sub <4 x i32> %y, %sh @@ -246,8 +246,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { define i32 @sub_const_op_lshr(i32 %x) { ; CHECK-LABEL: sub_const_op_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43 -; CHECK-NEXT: sub w0, w8, w0, lsr #31 +; CHECK-NEXT: asr w8, w0, #31 +; CHECK-NEXT: add w0, w8, #43 // =43 ; CHECK-NEXT: ret %sh = lshr i32 %x, 31 %r = sub i32 43, %sh @@ -257,9 +257,9 @@ define i32 @sub_const_op_lshr(i32 %x) { define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) { ; CHECK-LABEL: sub_const_op_lshr_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushr v0.4s, v0.4s, #31 ; CHECK-NEXT: movi v1.4s, #42 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ssra v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll index 75875878144..41d250e924e 100644 --- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll +++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll @@ -243,8 +243,8 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) { define i32 @sub_lshr(i32 %x, i32 %y) { ; CHECK-LABEL: sub_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: srwi 3, 3, 31 -; CHECK-NEXT: subf 3, 3, 4 +; CHECK-NEXT: srawi 3, 3, 31 +; CHECK-NEXT: add 3, 4, 3 ; CHECK-NEXT: blr %sh = lshr i32 %x, 31 %r = sub i32 %y, %sh @@ -257,8 +257,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-NEXT: vspltisw 4, -16 ; CHECK-NEXT: vspltisw 5, 15 ; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: vsrw 2, 2, 4 -; CHECK-NEXT: vsubuwm 2, 3, 2 +; CHECK-NEXT: vsraw 2, 2, 4 +; CHECK-NEXT: vadduwm 2, 3, 2 ; CHECK-NEXT: blr %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %r = sub <4 x i32> %y, %sh @@ -268,8 +268,8 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { define i32 @sub_const_op_lshr(i32 %x) { ; CHECK-LABEL: sub_const_op_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: srwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, 43 +; CHECK-NEXT: srawi 3, 3, 31 +; CHECK-NEXT: addi 3, 3, 43 ; CHECK-NEXT: blr %sh = lshr i32 %x, 31 %r = sub i32 43, %sh @@ -284,9 +284,9 @@ define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) { ; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l ; CHECK-NEXT: vsubuwm 3, 4, 3 -; CHECK-NEXT: vsrw 2, 2, 3 +; CHECK-NEXT: vsraw 2, 2, 3 ; CHECK-NEXT: lvx 3, 0, 3 -; CHECK-NEXT: vsubuwm 2, 3, 2 +; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll index cee647931bc..1579a77a2e9 100644 --- a/llvm/test/CodeGen/X86/signbit-shift.ll +++ b/llvm/test/CodeGen/X86/signbit-shift.ll @@ -156,9 +156,9 @@ define i32 @sext_ifneg(i32 %x) { define i32 @add_sext_ifneg(i32 %x) { ; CHECK-LABEL: add_sext_ifneg: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 0 %e = sext i1 %c to i32 @@ -169,9 +169,9 @@ define i32 @add_sext_ifneg(i32 %x) { define i32 @sel_ifneg_fval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifneg_fval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 0 %r = select i1 %c, i32 41, i32 42 @@ -231,9 +231,10 @@ define <4 x i32> @sub_lshr_not_vec_splat(<4 x i32> %x) { define i32 @sub_lshr(i32 %x, i32 %y) { ; CHECK-LABEL: sub_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: subl %edi, %esi -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal (%rdi,%rsi), %eax ; CHECK-NEXT: retq %sh = lshr i32 %x, 31 %r = sub i32 %y, %sh @@ -243,9 +244,8 @@ define i32 @sub_lshr(i32 %x, i32 %y) { define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: sub_lshr_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: psrld $31, %xmm0 -; CHECK-NEXT: psubd %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: paddd %xmm1, %xmm0 ; CHECK-NEXT: retq %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %r = sub <4 x i32> %y, %sh @@ -255,9 +255,9 @@ define <4 x i32> @sub_lshr_vec(<4 x i32> %x, <4 x i32> %y) { define i32 @sub_const_op_lshr(i32 %x) { ; CHECK-LABEL: sub_const_op_lshr: ; CHECK: # %bb.0: -; CHECK-NEXT: shrl $31, %edi -; CHECK-NEXT: xorl $43, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: sarl $31, %edi +; CHECK-NEXT: leal 43(%rdi), %eax ; CHECK-NEXT: retq %sh = lshr i32 %x, 31 %r = sub i32 43, %sh @@ -267,10 +267,8 @@ define i32 @sub_const_op_lshr(i32 %x) { define <4 x i32> @sub_const_op_lshr_vec(<4 x i32> %x) { ; CHECK-LABEL: sub_const_op_lshr_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: psrld $31, %xmm0 -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] -; CHECK-NEXT: psubd %xmm0, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: psrad $31, %xmm0 +; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %sh = lshr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %r = sub <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %sh |

