summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp55
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/test/CodeGen/X86/combine-sdiv.ll28
-rw-r--r--llvm/test/CodeGen/X86/combine-srem.ll37
-rw-r--r--llvm/test/CodeGen/X86/rem.ll7
-rw-r--r--llvm/test/CodeGen/X86/srem-seteq.ll30
6 files changed, 104 insertions, 56 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f4a1e33030b..d7efb0a126d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20080,6 +20080,61 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
return 2;
}
+SDValue
+X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N,0); // Lower SDIV as SDIV
+
+ assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) &&
+ "Unexpected divisor!");
+
+ // Only perform this transform if CMOV is supported otherwise the select
+ // below will become a branch.
+ if (!Subtarget.hasCMov())
+ return SDValue();
+
+ // fold (sdiv X, pow2)
+ EVT VT = N->getValueType(0);
+ // FIXME: Support i8/i16.
+ if ((VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64)))
+ return SDValue();
+
+ unsigned Lg2 = Divisor.countTrailingZeros();
+
+ // If the divisor is 2 or -2, the default expansion is better.
+ if (Lg2 == 1)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
+
+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+ SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CMov.getNode());
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ Created.push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+}
+
/// Result of 'and' is compared against zero. Change to a BT node if possible.
/// Returns the BT node and the condition code needed to use it.
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 05e4f16fc49..fbc9509f058 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1478,6 +1478,9 @@ namespace llvm {
/// Reassociate floating point divisions into multiply by reciprocal.
unsigned combineRepeatedFPDivisors() const override;
+
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
};
namespace X86 {
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 4f5b4124cb0..59e13e7cea4 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -3200,10 +3200,9 @@ define i32 @combine_i32_sdiv_pow2(i32 %x) {
; CHECK-LABEL: combine_i32_sdiv_pow2:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: sarl $31, %eax
-; CHECK-NEXT: shrl $28, %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: leal 15(%rdi), %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %eax
; CHECK-NEXT: sarl $4, %eax
; CHECK-NEXT: retq
%1 = sdiv i32 %x, 16
@@ -3214,10 +3213,9 @@ define i32 @combine_i32_sdiv_negpow2(i32 %x) {
; CHECK-LABEL: combine_i32_sdiv_negpow2:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: sarl $31, %eax
-; CHECK-NEXT: shrl $24, %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: leal 255(%rdi), %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %eax
; CHECK-NEXT: sarl $8, %eax
; CHECK-NEXT: negl %eax
; CHECK-NEXT: retq
@@ -3228,10 +3226,9 @@ define i32 @combine_i32_sdiv_negpow2(i32 %x) {
define i64 @combine_i64_sdiv_pow2(i64 %x) {
; CHECK-LABEL: combine_i64_sdiv_pow2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: sarq $63, %rax
-; CHECK-NEXT: shrq $60, %rax
-; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: leaq 15(%rdi), %rax
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rax
; CHECK-NEXT: sarq $4, %rax
; CHECK-NEXT: retq
%1 = sdiv i64 %x, 16
@@ -3241,10 +3238,9 @@ define i64 @combine_i64_sdiv_pow2(i64 %x) {
define i64 @combine_i64_sdiv_negpow2(i64 %x) {
; CHECK-LABEL: combine_i64_sdiv_negpow2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: sarq $63, %rax
-; CHECK-NEXT: shrq $56, %rax
-; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: leaq 255(%rdi), %rax
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rax
; CHECK-NEXT: sarq $8, %rax
; CHECK-NEXT: negq %rax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll
index 8bb0ec95d00..4148e5b6d35 100644
--- a/llvm/test/CodeGen/X86/combine-srem.ll
+++ b/llvm/test/CodeGen/X86/combine-srem.ll
@@ -56,10 +56,9 @@ define i32 @combine_srem_by_minsigned(i32 %x) {
; CHECK-LABEL: combine_srem_by_minsigned:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: sarl $31, %eax
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: addl %edi, %eax
+; CHECK-NEXT: leal 2147483647(%rdi), %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %eax
; CHECK-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: retq
@@ -513,12 +512,12 @@ define i32 @combine_srem_pow2(i32 %x) {
; CHECK-LABEL: combine_srem_pow2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: sarl $31, %ecx
-; CHECK-NEXT: shrl $28, %ecx
-; CHECK-NEXT: addl %edi, %ecx
+; CHECK-NEXT: leal 15(%rax), %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %ecx
; CHECK-NEXT: andl $-16, %ecx
; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
%1 = srem i32 %x, 16
ret i32 %1
@@ -528,12 +527,12 @@ define i32 @combine_srem_negpow2(i32 %x) {
; CHECK-LABEL: combine_srem_negpow2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: sarl $31, %ecx
-; CHECK-NEXT: shrl $24, %ecx
-; CHECK-NEXT: addl %edi, %ecx
+; CHECK-NEXT: leal 255(%rax), %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovnsl %edi, %ecx
; CHECK-NEXT: andl $-256, %ecx
; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
%1 = srem i32 %x, -256
ret i32 %1
@@ -543,10 +542,9 @@ define i64 @combine_i64_srem_pow2(i64 %x) {
; CHECK-LABEL: combine_i64_srem_pow2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: sarq $63, %rcx
-; CHECK-NEXT: shrq $60, %rcx
-; CHECK-NEXT: addq %rdi, %rcx
+; CHECK-NEXT: leaq 15(%rdi), %rcx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rcx
; CHECK-NEXT: andq $-16, %rcx
; CHECK-NEXT: subq %rcx, %rax
; CHECK-NEXT: retq
@@ -558,10 +556,9 @@ define i64 @combine_i64_srem_negpow2(i64 %x) {
; CHECK-LABEL: combine_i64_srem_negpow2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: sarq $63, %rcx
-; CHECK-NEXT: shrq $56, %rcx
-; CHECK-NEXT: addq %rdi, %rcx
+; CHECK-NEXT: leaq 255(%rdi), %rcx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rcx
; CHECK-NEXT: andq $-256, %rcx
; CHECK-NEXT: subq %rcx, %rax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/rem.ll b/llvm/test/CodeGen/X86/rem.ll
index 395a91ef361..c3f6d61f4ab 100644
--- a/llvm/test/CodeGen/X86/rem.ll
+++ b/llvm/test/CodeGen/X86/rem.ll
@@ -27,10 +27,9 @@ define i32 @test2(i32 %X) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: sarl $31, %ecx
-; CHECK-NEXT: shrl $24, %ecx
-; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: leal 255(%eax), %ecx
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: cmovnsl %eax, %ecx
; CHECK-NEXT: andl $-256, %ecx
; CHECK-NEXT: subl %ecx, %eax
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/srem-seteq.ll b/llvm/test/CodeGen/X86/srem-seteq.ll
index f27e279931c..67fe5f4c5e4 100644
--- a/llvm/test/CodeGen/X86/srem-seteq.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq.ll
@@ -318,10 +318,9 @@ define i32 @test_srem_pow2(i32 %X) nounwind {
; X86-LABEL: test_srem_pow2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: shrl $28, %edx
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: leal 15(%ecx), %edx
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: cmovnsl %ecx, %edx
; X86-NEXT: andl $-16, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
@@ -330,10 +329,10 @@ define i32 @test_srem_pow2(i32 %X) nounwind {
;
; X64-LABEL: test_srem_pow2:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: sarl $31, %ecx
-; X64-NEXT: shrl $28, %ecx
-; X64-NEXT: addl %edi, %ecx
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 15(%rdi), %ecx
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: cmovnsl %edi, %ecx
; X64-NEXT: andl $-16, %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %ecx, %edi
@@ -350,10 +349,9 @@ define i32 @test_srem_int_min(i32 %X) nounwind {
; X86-LABEL: test_srem_int_min:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: shrl %edx
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: leal 2147483647(%ecx), %edx
+; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: cmovnsl %ecx, %edx
; X86-NEXT: andl $-2147483648, %edx # imm = 0x80000000
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl %ecx, %edx
@@ -362,10 +360,10 @@ define i32 @test_srem_int_min(i32 %X) nounwind {
;
; X64-LABEL: test_srem_int_min:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: sarl $31, %ecx
-; X64-NEXT: shrl %ecx
-; X64-NEXT: addl %edi, %ecx
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal 2147483647(%rdi), %ecx
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: cmovnsl %edi, %ecx
; X64-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: addl %edi, %ecx
OpenPOWER on IntegriCloud