summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorZaara Syeda <syzaara@ca.ibm.com>2018-01-30 16:17:22 +0000
committerZaara Syeda <syzaara@ca.ibm.com>2018-01-30 16:17:22 +0000
commit1f59ae311bc234f718624d72483152e9b1e160b3 (patch)
tree387e83a2cda94680a8cb8e06e63348558eb8995f /llvm/lib/Transforms
parent8c345dcb9b1d3a5b0f8b6a81c7c8531b435ff3e2 (diff)
downloadbcm5719-llvm-1f59ae311bc234f718624d72483152e9b1e160b3.tar.gz
bcm5719-llvm-1f59ae311bc234f718624d72483152e9b1e160b3.zip
Re-commit : [PowerPC] Add handling for ColdCC calling convention and a pass to mark
candidates with coldcc attribute. This recommits r322721 reverted due to sanitizer memory leak build bot failures. Original commit message: This patch adds support for the coldcc calling convention for Power. This changes the set of non-volatile registers. It includes a pass to stress test the implementation by marking all static directly called functions with the coldcc attribute through the option -enable-coldcc-stress-test. It also includes an option, -ppc-enable-coldcc, to add the coldcc attribute to functions which are cold at all call sites based on BlockFrequencyInfo when the containing function does not call any non cold functions. Differential Revision: https://reviews.llvm.org/D38413 llvm-svn: 323778
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp164
1 files changed, 158 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index e66709fae57..2a2dc7b0739 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -22,9 +22,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -55,6 +57,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -88,6 +91,21 @@ STATISTIC(NumNestRemoved , "Number of nest attributes removed");
STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
+STATISTIC(NumInternalFunc, "Number of internal functions");
+STATISTIC(NumColdCC, "Number of functions marked coldcc");
+
+static cl::opt<bool>
+ EnableColdCCStressTest("enable-coldcc-stress-test",
+ cl::desc("Enable stress test of coldcc by adding "
+ "calling conv to all internal functions."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<int> ColdCCRelFreq(
+ "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+ cl::desc(
+ "Maximum block frequency, expressed as a percentage of caller's "
+ "entry frequency, for a call site to be considered cold for enabling"
+ "coldcc"));
/// Is this global variable possibly used by a leak checker as a root? If so,
/// we might not really want to eliminate the stores to it.
@@ -2095,20 +2113,114 @@ static void RemoveNestAttribute(Function *F) {
/// idea here is that we don't want to mess with the convention if the user
/// explicitly requested something with performance implications like coldcc,
/// GHC, or anyregcc.
-static bool isProfitableToMakeFastCC(Function *F) {
+static bool hasChangeableCC(Function *F) {
CallingConv::ID CC = F->getCallingConv();
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
}
+/// Return true if the block containing the call site has a BlockFrequency of
+/// less than ColdCCRelFreq% of the entry block.
+static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) {
+ const BranchProbability ColdProb(ColdCCRelFreq, 100);
+ auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
+ auto CallerEntryFreq =
+ CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+ return CallSiteFreq < CallerEntryFreq * ColdProb;
+}
+
+// This function checks if the input function F is cold at all call sites. It
+// also looks each call site's containing function, returning false if the
+// caller function contains other non cold calls. The input vector AllCallsCold
+// contains a list of functions that only have call sites in cold blocks.
+static bool
+isValidCandidateForColdCC(Function &F,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
+ const std::vector<Function *> &AllCallsCold) {
+
+ if (F.user_empty())
+ return false;
+
+ for (User *U : F.users()) {
+ if (isa<BlockAddress>(U))
+ continue;
+
+ CallSite CS(cast<Instruction>(U));
+ Function *CallerFunc = CS.getInstruction()->getParent()->getParent();
+ BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
+ if (!isColdCallSite(CS, CallerBFI))
+ return false;
+ auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc);
+ if (It == AllCallsCold.end())
+ return false;
+ }
+ return true;
+}
+
+static void changeCallSitesToColdCC(Function *F) {
+ for (User *U : F->users()) {
+ if (isa<BlockAddress>(U))
+ continue;
+ CallSite CS(cast<Instruction>(U));
+ CS.setCallingConv(CallingConv::Cold);
+ }
+}
+
+// This function iterates over all the call instructions in the input Function
+// and checks that all call sites are in cold blocks and are allowed to use the
+// coldcc calling convention.
+static bool
+hasOnlyColdCalls(Function &F,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ CallSite CS(cast<Instruction>(CI));
+ // Skip over isline asm instructions since they aren't function calls.
+ if (CI->isInlineAsm())
+ continue;
+ Function *CalledFn = CI->getCalledFunction();
+ if (!CalledFn)
+ return false;
+ if (!CalledFn->hasLocalLinkage())
+ return false;
+ // Skip over instrinsics since they won't remain as function calls.
+ if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
+ continue;
+ // Check if it's valid to use coldcc calling convention.
+ if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
+ CalledFn->hasAddressTaken())
+ return false;
+ BlockFrequencyInfo &CallerBFI = GetBFI(F);
+ if (!isColdCallSite(CS, CallerBFI))
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
static bool
OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
+
bool Changed = false;
+
+ std::vector<Function *> AllCallsCold;
+ for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
+ Function *F = &*FI++;
+ if (hasOnlyColdCalls(*F, GetBFI))
+ AllCallsCold.push_back(F);
+ }
+
// Optimize functions.
for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
Function *F = &*FI++;
+
// Functions without names cannot be referenced outside this module.
if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
@@ -2140,7 +2252,25 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
if (!F->hasLocalLinkage())
continue;
- if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
+
+ if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
+ NumInternalFunc++;
+ TargetTransformInfo &TTI = GetTTI(*F);
+ // Change the calling convention to coldcc if either stress testing is
+ // enabled or the target would like to use coldcc on functions which are
+ // cold at all call sites and the callers contain no other non coldcc
+ // calls.
+ if (EnableColdCCStressTest ||
+ (isValidCandidateForColdCC(*F, GetBFI, AllCallsCold) &&
+ TTI.useColdCCForColdCall(*F))) {
+ F->setCallingConv(CallingConv::Cold);
+ changeCallSitesToColdCC(F);
+ Changed = true;
+ NumColdCC++;
+ }
+ }
+
+ if (hasChangeableCC(F) && !F->isVarArg() &&
!F->hasAddressTaken()) {
// If this function has a calling convention worth changing, is not a
// varargs function, and is only called directly, promote it to use the
@@ -2618,6 +2748,8 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
static bool optimizeGlobalsInModule(
Module &M, const DataLayout &DL, TargetLibraryInfo *TLI,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
SmallSet<const Comdat *, 8> NotDiscardableComdats;
bool Changed = false;
@@ -2640,8 +2772,8 @@ static bool optimizeGlobalsInModule(
NotDiscardableComdats.insert(C);
// Delete functions that are trivially dead, ccc -> fastcc
- LocalChange |=
- OptimizeFunctions(M, TLI, LookupDomTree, NotDiscardableComdats);
+ LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree,
+ NotDiscardableComdats);
// Optimize global_ctors list.
LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
@@ -2678,7 +2810,15 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
return FAM.getResult<DominatorTreeAnalysis>(F);
};
- if (!optimizeGlobalsInModule(M, DL, &TLI, LookupDomTree))
+ auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
@@ -2701,12 +2841,22 @@ struct GlobalOptLegacyPass : public ModulePass {
auto LookupDomTree = [this](Function &F) -> DominatorTree & {
return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
};
- return optimizeGlobalsInModule(M, DL, TLI, LookupDomTree);
+ auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+
+ auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
+ return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+ };
+
+ return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
}
};
@@ -2717,6 +2867,8 @@ char GlobalOptLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
"Global Variable Optimizer", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
"Global Variable Optimizer", false, false)
OpenPOWER on IntegriCloud