summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/IPO/HotColdSplitting.cpp127
-rw-r--r--llvm/test/Transforms/HotColdSplit/X86/extraction-subregion-breaks-phis.ll63
-rw-r--r--llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll25
-rw-r--r--llvm/test/Transforms/HotColdSplit/addr-taken.ll2
-rw-r--r--llvm/test/Transforms/HotColdSplit/apply-noreturn-bonus.ll26
-rw-r--r--llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll19
-rw-r--r--llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll22
-rw-r--r--llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll53
-rw-r--r--llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll2
-rw-r--r--llvm/test/Transforms/HotColdSplit/resume.ll2
-rw-r--r--llvm/test/Transforms/HotColdSplit/split-cold-2.ll4
11 files changed, 216 insertions, 129 deletions
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index 8250db706e3..36dd6fa4be7 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -80,9 +80,9 @@ static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);
static cl::opt<int>
- SplittingThreshold("hotcoldsplit-threshold", cl::init(3), cl::Hidden,
- cl::desc("Code size threshold for splitting cold code "
- "(as a multiple of TCC_Basic)"));
+ SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden,
+ cl::desc("Base penalty for splitting cold code (as a "
+ "multiple of TCC_Basic)"));
namespace {
@@ -139,31 +139,6 @@ static bool mayExtractBlock(const BasicBlock &BB) {
!isa<InvokeInst>(BB.getTerminator());
}
-/// Check whether \p Region is profitable to outline.
-static bool isProfitableToOutline(const BlockSequence &Region,
- TargetTransformInfo &TTI) {
- // If the splitting threshold is set at or below zero, skip the usual
- // profitability check.
- if (SplittingThreshold <= 0)
- return true;
-
- if (Region.size() > 1)
- return true;
-
- int Cost = 0;
- const BasicBlock &BB = *Region[0];
- for (const Instruction &I : BB) {
- if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
- continue;
-
- Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
-
- if (Cost >= (SplittingThreshold * TargetTransformInfo::TCC_Basic))
- return true;
- }
- return false;
-}
-
/// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
/// Return true if the function is changed.
static bool markFunctionCold(Function &F) {
@@ -247,6 +222,82 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
return true;
}
+/// Get the benefit score of outlining \p Region.
+static int getOutliningBenefit(ArrayRef<BasicBlock *> Region,
+ TargetTransformInfo &TTI) {
+ // Sum up the code size costs of non-terminator instructions. Tight coupling
+ // with \ref getOutliningPenalty is needed to model the costs of terminators.
+ int Benefit = 0;
+ for (BasicBlock *BB : Region)
+ for (Instruction &I : BB->instructionsWithoutDebug())
+ if (&I != BB->getTerminator())
+ Benefit +=
+ TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+ return Benefit;
+}
+
+/// Get the penalty score for outlining \p Region.
+static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
+ unsigned NumInputs, unsigned NumOutputs) {
+ int Penalty = SplittingThreshold;
+ LLVM_DEBUG(dbgs() << "Applying penalty for splitting: " << Penalty << "\n");
+
+ // If the splitting threshold is set at or below zero, skip the usual
+ // profitability check.
+ if (SplittingThreshold <= 0)
+ return Penalty;
+
+ // The typical code size cost for materializing an argument for the outlined
+ // call.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
+ const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
+ Penalty += CostForArgMaterialization * NumInputs;
+
+ // The typical code size cost for an output alloca, its associated store, and
+ // its associated reload.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
+ const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+ Penalty += CostForRegionOutput * NumOutputs;
+
+ // Find the number of distinct exit blocks for the region. Use a conservative
+ // check to determine whether control returns from the region.
+ bool NoBlocksReturn = true;
+ SmallPtrSet<BasicBlock *, 2> SuccsOutsideRegion;
+ for (BasicBlock *BB : Region) {
+ // If a block has no successors, only assume it does not return if it's
+ // unreachable.
+ if (succ_empty(BB)) {
+ NoBlocksReturn &= isa<UnreachableInst>(BB->getTerminator());
+ continue;
+ }
+
+ for (BasicBlock *SuccBB : successors(BB)) {
+ if (find(Region, SuccBB) == Region.end()) {
+ NoBlocksReturn = false;
+ SuccsOutsideRegion.insert(SuccBB);
+ }
+ }
+ }
+
+ // Apply a `noreturn` bonus.
+ if (NoBlocksReturn) {
+ LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
+ << " non-returning terminators\n");
+ Penalty -= Region.size();
+ }
+
+ // Apply a penalty for having more than one successor outside of the region.
+ // This penalty accounts for the switch needed in the caller.
+ if (!SuccsOutsideRegion.empty()) {
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
+ << " non-region successors\n");
+ Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
+ }
+
+ return Penalty;
+}
+
Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
DominatorTree &DT,
BlockFrequencyInfo *BFI,
@@ -261,6 +312,18 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
/* AllowAlloca */ false,
/* Suffix */ "cold." + std::to_string(Count));
+ // Perform a simple cost/benefit analysis to decide whether or not to permit
+ // splitting.
+ SetVector<Value *> Inputs, Outputs, Sinks;
+ CE.findInputsOutputs(Inputs, Outputs, Sinks);
+ int OutliningBenefit = getOutliningBenefit(Region, TTI);
+ int OutliningPenalty =
+ getOutliningPenalty(Region, Inputs.size(), Outputs.size());
+ LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
+ << ", penalty = " << OutliningPenalty << "\n");
+ if (OutliningBenefit <= OutliningPenalty)
+ return nullptr;
+
Function *OrigF = Region[0]->getParent();
if (Function *OutF = CE.extractCodeRegion()) {
User *U = *OutF->user_begin();
@@ -556,14 +619,6 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
assert(!Region.empty() && "Empty outlining region in worklist");
do {
BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
- if (!isProfitableToOutline(SubRegion, TTI)) {
- LLVM_DEBUG({
- dbgs() << "Skipping outlining; not profitable to outline\n";
- SubRegion[0]->dump();
- });
- continue;
- }
-
LLVM_DEBUG({
dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
for (BasicBlock *BB : SubRegion)
diff --git a/llvm/test/Transforms/HotColdSplit/X86/extraction-subregion-breaks-phis.ll b/llvm/test/Transforms/HotColdSplit/X86/extraction-subregion-breaks-phis.ll
deleted file mode 100644
index 9a751e3b28d..00000000000
--- a/llvm/test/Transforms/HotColdSplit/X86/extraction-subregion-breaks-phis.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=1 < %s | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.14.0"
-
-; CHECK-LABEL: define {{.*}}@foo(
-; CHECK: call {{.*}}@foo.cold.1(
-; CHECK: unreachable
-
-; CHECK-LABEL: define {{.*}}@foo.cold.1(
-; CHECK: switch i32 undef, label %sw.epilog.i
-define void @foo(i32 %QMM) {
-entry:
- switch i32 %QMM, label %entry.if.end16_crit_edge [
- i32 1, label %if.then
- ]
-
-entry.if.end16_crit_edge: ; preds = %entry
- br label %if.end16
-
-if.then: ; preds = %entry
- br i1 undef, label %cond.true.i.i, label %_ZN10StringView8popFrontEv.exit.i
-
-cond.true.i.i: ; preds = %if.then
- ret void
-
-_ZN10StringView8popFrontEv.exit.i: ; preds = %if.then
- switch i32 undef, label %sw.epilog.i [
- i32 81, label %if.end16
- i32 82, label %sw.bb4.i
- i32 83, label %sw.bb8.i
- i32 84, label %sw.bb12.i
- i32 65, label %if.end16
- i32 66, label %sw.bb20.i
- i32 67, label %sw.bb24.i
- i32 68, label %sw.bb28.i
- ]
-
-sw.bb4.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
- br label %if.end16
-
-sw.bb8.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
- br label %if.end16
-
-sw.bb12.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
- br label %if.end16
-
-sw.bb20.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
- br label %if.end16
-
-sw.bb24.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
- br label %if.end16
-
-sw.bb28.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
- br label %if.end16
-
-sw.epilog.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
- br label %if.end16
-
-if.end16: ; preds = %sw.epilog.i, %sw.bb28.i, %sw.bb24.i, %sw.bb20.i, %sw.bb12.i, %sw.bb8.i, %sw.bb4.i, %_ZN10StringView8popFrontEv.exit.i, %_ZN10StringView8popFrontEv.exit.i, %entry.if.end16_crit_edge
- %0 = phi i8 [ 0, %entry.if.end16_crit_edge ], [ 0, %_ZN10StringView8popFrontEv.exit.i ], [ 0, %_ZN10StringView8popFrontEv.exit.i ], [ 1, %sw.bb4.i ], [ 2, %sw.bb8.i ], [ 3, %sw.bb12.i ], [ 1, %sw.bb20.i ], [ 2, %sw.bb24.i ], [ 3, %sw.bb28.i ], [ 0, %sw.epilog.i ]
- unreachable
-}
diff --git a/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll b/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll
deleted file mode 100644
index 3f04283b0c1..00000000000
--- a/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; The magic number 6 comes from (1 * TCC_Expensive) + (1 * CostOfCallX86).
-; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=6 -S < %s | FileCheck %s
-
-; Test that we outline even though there are only two cold instructions. TTI
-; should determine that they are expensive in terms of code size.
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.14.0"
-
-; CHECK-LABEL: @fun
-; CHECK: call void @fun.cold.1
-define void @fun(i32 %x) {
-entry:
- br i1 undef, label %if.then, label %if.else
-
-if.then:
- ret void
-
-if.else:
- %y = sdiv i32 %x, 111
- call void @sink(i32 %y)
- ret void
-}
-
-declare void @sink(i32 %x) cold
diff --git a/llvm/test/Transforms/HotColdSplit/addr-taken.ll b/llvm/test/Transforms/HotColdSplit/addr-taken.ll
index f2f448c8a46..19f1d4f1974 100644
--- a/llvm/test/Transforms/HotColdSplit/addr-taken.ll
+++ b/llvm/test/Transforms/HotColdSplit/addr-taken.ll
@@ -1,4 +1,4 @@
-; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
diff --git a/llvm/test/Transforms/HotColdSplit/apply-noreturn-bonus.ll b/llvm/test/Transforms/HotColdSplit/apply-noreturn-bonus.ll
new file mode 100644
index 00000000000..c1d9af88595
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/apply-noreturn-bonus.ll
@@ -0,0 +1,26 @@
+; REQUIRES: asserts
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @sink() cold
+
+define void @foo(i32 %arg) {
+entry:
+ br i1 undef, label %cold1, label %exit
+
+cold1:
+ ; CHECK: Applying bonus for: 4 non-returning terminators
+ call void @sink()
+ br i1 undef, label %cold2, label %cold3
+
+cold2:
+ br label %cold4
+
+cold3:
+ br label %cold4
+
+cold4:
+ unreachable
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll
new file mode 100644
index 00000000000..fffd6f9f5dc
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll
@@ -0,0 +1,19 @@
+; REQUIRES: asserts
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @sink(i32*, i32, i32) cold
+
+@g = global i32 0
+
+define void @foo(i32 %arg) {
+ %local = load i32, i32* @g
+ br i1 undef, label %cold, label %exit
+
+cold:
+ ; CHECK: Applying penalty for: 2 inputs
+ call void @sink(i32* @g, i32 %arg, i32 %local)
+ ret void
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll
new file mode 100644
index 00000000000..a7d9f97ab03
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll
@@ -0,0 +1,22 @@
+; REQUIRES: asserts
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @sink() cold
+
+@g = global i32 0
+
+define i32 @foo(i32 %arg) {
+entry:
+ br i1 undef, label %cold, label %exit
+
+cold:
+ ; CHECK: Applying penalty for: 1 output
+ ; CHECK: Applying penalty for: 1 non-region successors
+ %local = load i32, i32* @g
+ call void @sink()
+ br label %exit
+
+exit:
+ %p = phi i32 [ %local, %cold ], [ 0, %entry ]
+ ret i32 %p
+}
diff --git a/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll b/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll
new file mode 100644
index 00000000000..3886d76da01
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll
@@ -0,0 +1,53 @@
+; REQUIRES: asserts
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @sink() cold
+
+; CHECK-LABEL: Outlining in one_non_region_successor
+define void @one_non_region_successor(i32 %arg) {
+entry:
+ br i1 undef, label %cold1, label %exit
+
+cold1:
+ ; CHECK: Applying penalty for: 1 non-region successor
+ call void @sink()
+ br i1 undef, label %cold2, label %cold3
+
+cold2:
+ br i1 undef, label %cold4, label %exit
+
+cold3:
+ br i1 undef, label %cold4, label %exit
+
+cold4:
+ unreachable
+
+exit:
+ ret void
+}
+
+; CHECK-LABEL: Outlining in two_non_region_successor
+define void @two_non_region_successors(i32 %arg) {
+entry:
+ br i1 undef, label %cold1, label %exit1
+
+cold1:
+ ; CHECK: Applying penalty for: 2 non-region successors
+ call void @sink()
+ br i1 undef, label %cold2, label %cold3
+
+cold2:
+ br i1 undef, label %cold4, label %exit1
+
+cold3:
+ br i1 undef, label %cold4, label %exit2
+
+cold4:
+ unreachable
+
+exit1:
+ br label %exit2
+
+exit2:
+ ret void
+}
diff --git a/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll b/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
index 64bc94ebd54..b33454b5c4e 100644
--- a/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
+++ b/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s 2>&1 | FileCheck %s
+; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s
; CHECK-LABEL: define {{.*}}@fun
; CHECK: call {{.*}}@fun.cold.2(
diff --git a/llvm/test/Transforms/HotColdSplit/resume.ll b/llvm/test/Transforms/HotColdSplit/resume.ll
index cbda078da90..2b8ea7d91d9 100644
--- a/llvm/test/Transforms/HotColdSplit/resume.ll
+++ b/llvm/test/Transforms/HotColdSplit/resume.ll
@@ -1,4 +1,4 @@
-; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.14.0"
diff --git a/llvm/test/Transforms/HotColdSplit/split-cold-2.ll b/llvm/test/Transforms/HotColdSplit/split-cold-2.ll
index 0ce16817930..0b228a58897 100644
--- a/llvm/test/Transforms/HotColdSplit/split-cold-2.ll
+++ b/llvm/test/Transforms/HotColdSplit/split-cold-2.ll
@@ -1,5 +1,5 @@
-; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
-; RUN: opt -hotcoldsplit-threshold=0 -passes=hotcoldsplit -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
+; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
+; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=-1 -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
; Make sure this compiles. This test used to fail with an invalid phi node: the
; two predecessors were outlined and the SSA representation was invalid.
OpenPOWER on IntegriCloud