summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-08-21 17:15:33 +0000
committerCraig Topper <craig.topper@intel.com>2018-08-21 17:15:33 +0000
commitb172b8884ac58dd8ca8f3ab63704c35fc7638458 (patch)
treeaa876fd8d371a038da3b7f8c8252e1fbd2207bec
parent98eb4ae499892bc0d6dc273b37af417d8053098c (diff)
downloadbcm5719-llvm-b172b8884ac58dd8ca8f3ab63704c35fc7638458.tar.gz
bcm5719-llvm-b172b8884ac58dd8ca8f3ab63704c35fc7638458.zip
[BypassSlowDivision] Teach bypass slow division not to interfere with div by constant where constants have been constant hoisted, but not moved from their basic block
DAGCombiner doesn't pay attention to whether constants are opaque before doing the div by constant optimization. So BypassSlowDivision shouldn't introduce control flow that would make DAGCombiner unable to see an opaque constant. This can occur when a div and rem of the same constant are used in the same basic block. it will be hoisted, but not leave the block. Longer term we probably need to look into the X86 immediate cost model used by constant hoisting and maybe not mark div/rem immediates for hoisting at all. This fixes the case from PR38649. Differential Revision: https://reviews.llvm.org/D51000 llvm-svn: 340303
-rw-r--r--llvm/lib/Transforms/Utils/BypassSlowDivision.cpp9
-rw-r--r--llvm/test/CodeGen/X86/divide-by-constant.ll94
2 files changed, 35 insertions, 68 deletions
diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 05512a6dff3..e7828af648a 100644
--- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -388,6 +388,15 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
return None;
}
+ // After Constant Hoisting pass, long constants may be represented as
+ // bitcast instructions. As a result, some constants may look like an
+ // instruction at first, and an additional check is necessary to find out if
+ // an operand is actually a constant.
+ if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
+ if (BCI->getParent() == SlowDivOrRem->getParent() &&
+ isa<ConstantInt>(BCI->getOperand(0)))
+ return None;
+
if (DividendShort && !isSignedOp()) {
// If the division is unsigned and Dividend is known to be short, then
// either
diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
index 0c29edf4840..00a0e31ea0d 100644
--- a/llvm/test/CodeGen/X86/divide-by-constant.ll
+++ b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -384,39 +384,18 @@ define { i64, i32 } @PR38622(i64) nounwind {
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
-; X64-FAST-LABEL: PR38622:
-; X64-FAST: # %bb.0:
-; X64-FAST-NEXT: movq %rdi, %rax
-; X64-FAST-NEXT: shrq $11, %rax
-; X64-FAST-NEXT: movabsq $4835703278458517, %rcx # imm = 0x112E0BE826D695
-; X64-FAST-NEXT: mulq %rcx
-; X64-FAST-NEXT: shrq $9, %rdx
-; X64-FAST-NEXT: imull $-294967296, %edx, %eax # imm = 0xEE6B2800
-; X64-FAST-NEXT: subl %eax, %edi
-; X64-FAST-NEXT: movq %rdx, %rax
-; X64-FAST-NEXT: movl %edi, %edx
-; X64-FAST-NEXT: retq
-;
-; X64-SLOW-LABEL: PR38622:
-; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movl $4000000000, %ecx # imm = 0xEE6B2800
-; X64-SLOW-NEXT: movq %rdi, %rax
-; X64-SLOW-NEXT: shrq $32, %rax
-; X64-SLOW-NEXT: je .LBB14_1
-; X64-SLOW-NEXT: # %bb.2:
-; X64-SLOW-NEXT: xorl %edx, %edx
-; X64-SLOW-NEXT: movq %rdi, %rax
-; X64-SLOW-NEXT: divq %rcx
-; X64-SLOW-NEXT: # kill: def $edx killed $edx killed $rdx
-; X64-SLOW-NEXT: retq
-; X64-SLOW-NEXT: .LBB14_1:
-; X64-SLOW-NEXT: xorl %edx, %edx
-; X64-SLOW-NEXT: movl %edi, %eax
-; X64-SLOW-NEXT: divl %ecx
-; X64-SLOW-NEXT: # kill: def $edx killed $edx def $rdx
-; X64-SLOW-NEXT: # kill: def $eax killed $eax def $rax
-; X64-SLOW-NEXT: # kill: def $edx killed $edx killed $rdx
-; X64-SLOW-NEXT: retq
+; X64-LABEL: PR38622:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $11, %rax
+; X64-NEXT: movabsq $4835703278458517, %rcx # imm = 0x112E0BE826D695
+; X64-NEXT: mulq %rcx
+; X64-NEXT: shrq $9, %rdx
+; X64-NEXT: imull $-294967296, %edx, %eax # imm = 0xEE6B2800
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: movq %rdx, %rax
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: retq
%2 = udiv i64 %0, 4000000000
%3 = urem i64 %0, 4000000000
%4 = trunc i64 %3 to i32
@@ -459,41 +438,20 @@ define { i64, i32 } @PR38622_signed(i64) nounwind {
; X32-NEXT: popl %ebp
; X32-NEXT: retl
;
-; X64-FAST-LABEL: PR38622_signed:
-; X64-FAST: # %bb.0:
-; X64-FAST-NEXT: movabsq $1237940039285380275, %rcx # imm = 0x112E0BE826D694B3
-; X64-FAST-NEXT: movq %rdi, %rax
-; X64-FAST-NEXT: imulq %rcx
-; X64-FAST-NEXT: movq %rdx, %rcx
-; X64-FAST-NEXT: shrq $63, %rcx
-; X64-FAST-NEXT: sarq $28, %rdx
-; X64-FAST-NEXT: leaq (%rdx,%rcx), %rax
-; X64-FAST-NEXT: addl %ecx, %edx
-; X64-FAST-NEXT: imull $-294967296, %edx, %ecx # imm = 0xEE6B2800
-; X64-FAST-NEXT: subl %ecx, %edi
-; X64-FAST-NEXT: movl %edi, %edx
-; X64-FAST-NEXT: retq
-;
-; X64-SLOW-LABEL: PR38622_signed:
-; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movl $4000000000, %ecx # imm = 0xEE6B2800
-; X64-SLOW-NEXT: movq %rdi, %rax
-; X64-SLOW-NEXT: shrq $32, %rax
-; X64-SLOW-NEXT: je .LBB15_1
-; X64-SLOW-NEXT: # %bb.2:
-; X64-SLOW-NEXT: movq %rdi, %rax
-; X64-SLOW-NEXT: cqto
-; X64-SLOW-NEXT: idivq %rcx
-; X64-SLOW-NEXT: # kill: def $edx killed $edx killed $rdx
-; X64-SLOW-NEXT: retq
-; X64-SLOW-NEXT: .LBB15_1:
-; X64-SLOW-NEXT: xorl %edx, %edx
-; X64-SLOW-NEXT: movl %edi, %eax
-; X64-SLOW-NEXT: divl %ecx
-; X64-SLOW-NEXT: # kill: def $edx killed $edx def $rdx
-; X64-SLOW-NEXT: # kill: def $eax killed $eax def $rax
-; X64-SLOW-NEXT: # kill: def $edx killed $edx killed $rdx
-; X64-SLOW-NEXT: retq
+; X64-LABEL: PR38622_signed:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $1237940039285380275, %rcx # imm = 0x112E0BE826D694B3
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: imulq %rcx
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: shrq $63, %rcx
+; X64-NEXT: sarq $28, %rdx
+; X64-NEXT: leaq (%rdx,%rcx), %rax
+; X64-NEXT: addl %ecx, %edx
+; X64-NEXT: imull $-294967296, %edx, %ecx # imm = 0xEE6B2800
+; X64-NEXT: subl %ecx, %edi
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: retq
%2 = sdiv i64 %0, 4000000000
%3 = srem i64 %0, 4000000000
%4 = trunc i64 %3 to i32
OpenPOWER on IntegriCloud