summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/IPO/HotColdSplitting.cpp44
-rw-r--r--llvm/test/Transforms/HotColdSplit/X86/lit.local.cfg3
-rw-r--r--llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll25
-rw-r--r--llvm/test/Transforms/HotColdSplit/do-not-split.ll3
-rw-r--r--llvm/test/Transforms/HotColdSplit/minsize.ll4
-rw-r--r--llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll3
6 files changed, 63 insertions, 19 deletions
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index ce8a5060a3a..621ac7dc8ab 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -66,10 +66,10 @@ using namespace llvm;
static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);
-static cl::opt<unsigned> MinOutliningInstCount(
- "min-outlining-inst-count", cl::init(3), cl::Hidden,
- cl::desc("Minimum number of instructions needed for a single-block region "
- "to be an outlining candidate"));
+static cl::opt<int>
+ MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
+ cl::desc("Code size threshold for outlining within a "
+ "single BB (as a multiple of TCC_Basic)"));
namespace {
@@ -135,14 +135,18 @@ static bool mayExtractBlock(const BasicBlock &BB) {
return !BB.hasAddressTaken();
}
-/// Check whether \p BB has at least \p Min non-debug, non-terminator
-/// instructions.
-static bool hasMinimumInstCount(const BasicBlock &BB, unsigned Min) {
- unsigned Count = 0;
+/// Check whether \p BB is profitable to outline (i.e. its code size cost meets
+/// the threshold set in \p MinOutliningThreshold).
+static bool isProfitableToOutline(const BasicBlock &BB,
+ TargetTransformInfo &TTI) {
+ int Cost = 0;
for (const Instruction &I : BB) {
if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
continue;
- if (++Count >= Min)
+
+ Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+ if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
return true;
}
return false;
@@ -156,8 +160,10 @@ static bool hasMinimumInstCount(const BasicBlock &BB, unsigned Min) {
///
/// Return an empty sequence if the cold region is too small to outline, or if
/// the cold region has no warm predecessors.
-static BlockSequence
-findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) {
+static BlockSequence findMaximalColdRegion(BasicBlock &SinkBB,
+ TargetTransformInfo &TTI,
+ DominatorTree &DT,
+ PostDomTree &PDT) {
// The maximal cold region.
BlockSequence ColdRegion = {};
@@ -241,8 +247,7 @@ findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) {
++SuccIt;
}
- if (ColdRegion.size() == 1 &&
- !hasMinimumInstCount(*ColdRegion[0], MinOutliningInstCount))
+ if (ColdRegion.size() == 1 && !isProfitableToOutline(*ColdRegion[0], TTI))
return {};
return ColdRegion;
@@ -251,6 +256,7 @@ findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) {
/// Get the largest cold region in \p F.
static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,
BlockFrequencyInfo *BFI,
+ TargetTransformInfo &TTI,
DominatorTree &DT, PostDomTree &PDT) {
// Keep track of the largest cold region.
BlockSequence LargestColdRegion = {};
@@ -270,7 +276,7 @@ static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,
});
// Find a maximal cold region we can outline.
- BlockSequence ColdRegion = findMaximalColdRegion(BB, DT, PDT);
+ BlockSequence ColdRegion = findMaximalColdRegion(BB, TTI, DT, PDT);
if (ColdRegion.empty()) {
LLVM_DEBUG(dbgs() << " Skipping (block not profitable to extract)\n");
continue;
@@ -305,7 +311,7 @@ public:
private:
bool shouldOutlineFrom(const Function &F) const;
Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
- BlockFrequencyInfo *BFI,
+ BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
OptimizationRemarkEmitter &ORE, unsigned Count);
SmallPtrSet<const Function *, 2> OutlinedFunctions;
ProfileSummaryInfo *PSI;
@@ -365,6 +371,7 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
DominatorTree &DT,
BlockFrequencyInfo *BFI,
+ TargetTransformInfo &TTI,
OptimizationRemarkEmitter &ORE,
unsigned Count) {
assert(!Region.empty());
@@ -393,7 +400,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
CallInst *CI = cast<CallInst>(U);
CallSite CS(CI);
NumColdRegionsOutlined++;
- if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
+ if (TTI.useColdCCForColdCall(*OutF)) {
OutF->setCallingConv(CallingConv::Cold);
CS.setCallingConv(CallingConv::Cold);
}
@@ -437,14 +444,15 @@ bool HotColdSplitting::run(Module &M) {
PostDomTree PDT(F);
PDT.recalculate(F);
BlockFrequencyInfo *BFI = GetBFI(F);
+ TargetTransformInfo &TTI = GetTTI(F);
- BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, DT, PDT);
+ BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, TTI, DT, PDT);
if (ColdRegion.empty())
continue;
OptimizationRemarkEmitter &ORE = (*GetORE)(F);
Function *Outlined =
- extractColdRegion(ColdRegion, DT, BFI, ORE, /*Count=*/1);
+ extractColdRegion(ColdRegion, DT, BFI, TTI, ORE, /*Count=*/1);
if (Outlined) {
OutlinedFunctions.insert(Outlined);
Changed = true;
diff --git a/llvm/test/Transforms/HotColdSplit/X86/lit.local.cfg b/llvm/test/Transforms/HotColdSplit/X86/lit.local.cfg
new file mode 100644
index 00000000000..e71f3cc4c41
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
+
diff --git a/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll b/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll
new file mode 100644
index 00000000000..5b0cceae2af
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll
@@ -0,0 +1,25 @@
+; The magic number 6 comes from (1 * TCC_Expensive) + (1 * CostOfCallX86).
+; RUN: opt -hotcoldsplit -min-outlining-thresh=6 -S < %s | FileCheck %s
+
+; Test that we outline even though there are only two cold instructions. TTI
+; should determine that they are expensive in terms of code size.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: @fun
+; CHECK: call void @fun.cold.1
+define void @fun(i32 %x) {
+entry:
+ br i1 undef, label %if.then, label %if.else
+
+if.then:
+ ret void
+
+if.else:
+ %y = sdiv i32 %x, 111
+ call void @sink(i32 %y)
+ ret void
+}
+
+declare void @sink(i32 %x) cold
diff --git a/llvm/test/Transforms/HotColdSplit/do-not-split.ll b/llvm/test/Transforms/HotColdSplit/do-not-split.ll
index 213681383ea..d5a8c44cc04 100644
--- a/llvm/test/Transforms/HotColdSplit/do-not-split.ll
+++ b/llvm/test/Transforms/HotColdSplit/do-not-split.ll
@@ -1,6 +1,9 @@
; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
; RUN: opt -passes=hotcoldsplit -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
; Check that these functions are not split. Outlined functions are called from a
; basic block named codeRepl.
diff --git a/llvm/test/Transforms/HotColdSplit/minsize.ll b/llvm/test/Transforms/HotColdSplit/minsize.ll
index eb42ad14af2..69cd0979b94 100644
--- a/llvm/test/Transforms/HotColdSplit/minsize.ll
+++ b/llvm/test/Transforms/HotColdSplit/minsize.ll
@@ -1,8 +1,10 @@
; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
; CHECK-LABEL: @fun
; CHECK: call void @fun.cold.1
-
define void @fun() {
entry:
br i1 undef, label %if.then, label %if.else
diff --git a/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll b/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll
index b77201fe0d3..becfaf8e63d 100644
--- a/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll
+++ b/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll
@@ -1,5 +1,8 @@
; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
; CHECK-LABEL: define {{.*}}@foo.cold
; CHECK-NOT: llvm.dbg.value
OpenPOWER on IntegriCloud