[CodeGenPrepare] Disable div bypass when working set size is huge.

Summary: Bypass of slow divs based on operand values is currently disabled for -Os. Do the same when profile summary is available and the working set size of the application is huge. This is similar to how loop peeling is guarded by hasHugeWorkingSetSize. In the div bypass case, the generated extra code (and the extra branch) tendss to outweigh the benefits of the bypass. This results in noticeable performance improvement on an internal application. Reviewers: davidxl Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39992 llvm-svn: 318179
author: Easwaran Raman <eraman@google.com> 2017-11-14 19:31:51 +0000
committer: Easwaran Raman <eraman@google.com> 2017-11-14 19:31:51 +0000
commit: 0d55b55bb616ce28b5570d488edfa294ea039879 (patch)
tree: 7c4579c94f2853960a8a47e6302589290704457c /llvm
parent: 55b8590e03cf211563e6a7f86a48cf4c2c2ab10f (diff)
download: bcm5719-llvm-0d55b55bb616ce28b5570d488edfa294ea039879.tar.gz
bcm5719-llvm-0d55b55bb616ce28b5570d488edfa294ea039879.zip
2 files changed, 32 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index d6633a508f5..12351cd3fde 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -353,9 +353,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   OptSize = F.optForSize();
 
+  ProfileSummaryInfo *PSI =
+      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   if (ProfileGuidedSectionPrefix) {
-    ProfileSummaryInfo *PSI =
-        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
     if (PSI->isFunctionHotInCallGraph(&F))
       F.setSectionPrefix(".hot");
     else if (PSI->isFunctionColdInCallGraph(&F))
@@ -364,7 +364,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
-  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+  if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
+      TLI->isSlowDivBypassed()) {
     const DenseMap<unsigned int, unsigned int> &BypassWidths =
        TLI->getBypassSlowDivWidths();
     BasicBlock* BB = &*F.begin();
diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
index b6a53130cf2..2439f468952 100644
--- a/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
+++ b/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom       < %s | FileCheck -check-prefixes=ATOM,CHECK %s
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=REST,CHECK %s
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=REST,CHECK %s
+; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=HUGEWS %s
 
 ; Verify that div32 is bypassed only for Atoms.
 define i32 @div32(i32 %a, i32 %b) {
@@ -36,6 +37,15 @@ entry:
 define i64 @div64_optsize(i64 %a, i64 %b) optsize {
 ; CHECK-LABEL: div64_optsize:
 ; CHECK-NOT: divl
+; CHECK: ret
+  %div = sdiv i64 %a, %b
+  ret i64 %div
+}
+
+define i64 @div64_hugews(i64 %a, i64 %b) {
+; HUGEWS-LABEL: div64_hugews:
+; HUGEWS-NOT: divl
+; HUGEWS: ret
   %div = sdiv i64 %a, %b
   ret i64 %div
 }
@@ -43,6 +53,7 @@ define i64 @div64_optsize(i64 %a, i64 %b) optsize {
 define i32 @div32_optsize(i32 %a, i32 %b) optsize {
 ; CHECK-LABEL: div32_optsize:
 ; CHECK-NOT: divb
+; CHECK: ret
   %div = sdiv i32 %a, %b
   ret i32 %div
 }
@@ -50,6 +61,23 @@ define i32 @div32_optsize(i32 %a, i32 %b) optsize {
 define i32 @div32_minsize(i32 %a, i32 %b) minsize {
 ; CHECK-LABEL: div32_minsize:
 ; CHECK-NOT: divb
+; CHECK: ret
   %div = sdiv i32 %a, %b
   ret i32 %div
 }
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 3}
+!14 = !{i32 999999, i64 5, i32 3}
author	Easwaran Raman <eraman@google.com>	2017-11-14 19:31:51 +0000
committer	Easwaran Raman <eraman@google.com>	2017-11-14 19:31:51 +0000
commit	0d55b55bb616ce28b5570d488edfa294ea039879 (patch)
tree	7c4579c94f2853960a8a47e6302589290704457c /llvm
parent	55b8590e03cf211563e6a7f86a48cf4c2c2ab10f (diff)
download	bcm5719-llvm-0d55b55bb616ce28b5570d488edfa294ea039879.tar.gz bcm5719-llvm-0d55b55bb616ce28b5570d488edfa294ea039879.zip