summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp7
-rw-r--r--llvm/test/CodeGen/X86/bypass-slow-division-tune.ll28
2 files changed, 32 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index d6633a508f5..12351cd3fde 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -353,9 +353,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
OptSize = F.optForSize();
+ ProfileSummaryInfo *PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
- ProfileSummaryInfo *PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (PSI->isFunctionHotInCallGraph(&F))
F.setSectionPrefix(".hot");
else if (PSI->isFunctionColdInCallGraph(&F))
@@ -364,7 +364,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+ if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
+ TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
BasicBlock* BB = &*F.begin();
diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
index b6a53130cf2..2439f468952 100644
--- a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
+++ b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
@@ -2,6 +2,7 @@
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom < %s | FileCheck -check-prefixes=ATOM,CHECK %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=REST,CHECK %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=REST,CHECK %s
+; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=HUGEWS %s
; Verify that div32 is bypassed only for Atoms.
define i32 @div32(i32 %a, i32 %b) {
@@ -36,6 +37,15 @@ entry:
define i64 @div64_optsize(i64 %a, i64 %b) optsize {
; CHECK-LABEL: div64_optsize:
; CHECK-NOT: divl
+; CHECK: ret
+ %div = sdiv i64 %a, %b
+ ret i64 %div
+}
+
+define i64 @div64_hugews(i64 %a, i64 %b) {
+; HUGEWS-LABEL: div64_hugews:
+; HUGEWS-NOT: divl
+; HUGEWS: ret
%div = sdiv i64 %a, %b
ret i64 %div
}
@@ -43,6 +53,7 @@ define i64 @div64_optsize(i64 %a, i64 %b) optsize {
define i32 @div32_optsize(i32 %a, i32 %b) optsize {
; CHECK-LABEL: div32_optsize:
; CHECK-NOT: divb
+; CHECK: ret
%div = sdiv i32 %a, %b
ret i32 %div
}
@@ -50,6 +61,23 @@ define i32 @div32_optsize(i32 %a, i32 %b) optsize {
define i32 @div32_minsize(i32 %a, i32 %b) minsize {
; CHECK-LABEL: div32_minsize:
; CHECK-NOT: divb
+; CHECK: ret
%div = sdiv i32 %a, %b
ret i32 %div
}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 3}
+!14 = !{i32 999999, i64 5, i32 3}
OpenPOWER on IntegriCloud