diff options
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfo.h | 9 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 2 | ||||
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCCallingConv.td | 53 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFastISel.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/GlobalOpt.cpp | 164 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/coldcc.ll | 46 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/coldcc2.ll | 42 | ||||
-rw-r--r-- | llvm/test/Other/pass-pipelines.ll | 2 | ||||
-rw-r--r-- | llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll | 81 | ||||
-rw-r--r-- | llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg | 3 | ||||
-rw-r--r-- | llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll | 48 |
17 files changed, 13 insertions, 507 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index d74de16ffcc..c20f20cfbe4 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -541,10 +541,6 @@ public: /// containing this constant value for the target. bool shouldBuildLookupTablesForConstant(Constant *C) const; - /// \brief Return true if the input function which is cold at all call sites, - /// should use coldcc calling convention. - bool useColdCCForColdCall(Function &F) const; - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, @@ -996,7 +992,6 @@ public: virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; - virtual bool useColdCCForColdCall(Function &F) = 0; virtual unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0; virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, @@ -1242,10 +1237,6 @@ public: bool shouldBuildLookupTablesForConstant(Constant *C) override { return Impl.shouldBuildLookupTablesForConstant(C); } - bool useColdCCForColdCall(Function &F) override { - return Impl.useColdCCForColdCall(F); - } - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) override { return Impl.getScalarizationOverhead(Ty, Insert, Extract); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 73e8aa22b3c..4c37402278e 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -284,8 +284,6 @@ public: bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } - bool useColdCCForColdCall(Function &F) { return false; } - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { return 0; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index e046984ffc3..b744cae51ed 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -226,10 +226,6 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const return TTIImpl->shouldBuildLookupTablesForConstant(C); } -bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { - return TTIImpl->useColdCCForColdCall(F); -} - unsigned TargetTransformInfo:: getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index d7d2cad1e5f..a4f4c8688cc 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -45,29 +45,6 @@ def RetCC_PPC64_AnyReg : CallingConv<[ CCCustom<"CC_PPC_AnyReg_Error"> ]>; -// Return-value convention for PowerPC coldcc. -def RetCC_PPC_Cold : CallingConv<[ - // Use the same return registers as RetCC_PPC, but limited to only - // one return value. The remaining return values will be saved to - // the stack. - CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, - CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>, - - CCIfType<[i32], CCAssignToReg<[R3]>>, - CCIfType<[i64], CCAssignToReg<[X3]>>, - CCIfType<[i128], CCAssignToReg<[X3]>>, - - CCIfType<[f32], CCAssignToReg<[F1]>>, - CCIfType<[f64], CCAssignToReg<[F1]>>, - - CCIfType<[v4f64, v4f32, v4i1], - CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1]>>>, - - CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64], - CCIfSubtarget<"hasAltivec()", - CCAssignToReg<[V2]>>> -]>; - // Return-value convention for PowerPC def RetCC_PPC : CallingConv<[ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>, @@ -294,36 +271,6 @@ def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)> def CSR_NoRegs : CalleeSavedRegs<(add)>; -// coldcc calling convection marks most registers as non-volatile. -// Do not include r1 since the stack pointer is never considered a CSR. -// Do not include r2, since it is the TOC register and is added depending -// on wether or not the function uses the TOC and is a non-leaf. -// Do not include r0,r11,r13 as they are optional in functional linkage -// and value may be altered by inter-library calls. -// Do not include r12 as it is used as a scratch register. -// Do not include return registers r3, f1, v2. -def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10), - (sequence "R%u", 14, 31), - F0, (sequence "F%u", 2, 31), - (sequence "CR%u", 0, 7))>; - -def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC, - (sequence "V%u", 0, 1), - (sequence "V%u", 3, 31))>; - -def CSR_SVR64_ColdCC : CalleeSavedRegs<(add (sequence "X%u", 4, 10), - (sequence "X%u", 14, 31), - F0, (sequence "F%u", 2, 31), - (sequence "CR%u", 0, 7))>; - -def CSR_SVR64_ColdCC_R2: CalleeSavedRegs<(add CSR_SVR64_ColdCC, X2)>; - -def CSR_SVR64_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR64_ColdCC, - (sequence "V%u", 0, 1), - (sequence "V%u", 3, 31))>; - -def CSR_SVR64_ColdCC_R2_Altivec : CalleeSavedRegs<(add CSR_SVR64_ColdCC_Altivec, X2)>; - def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10), (sequence "X%u", 14, 31), (sequence "F%u", 0, 31), diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 03083908d84..402e29cdff7 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -206,8 +206,6 @@ CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) { return CC_PPC32_SVR4_ByVal; else if (Flag == 3) return CC_PPC32_SVR4_VarArg; - else if (Flag == 4) - return RetCC_PPC_Cold; else return RetCC_PPC; } diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index bdda9d13ad8..7902da20a01 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1950,14 +1950,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; // Add the callee-saved register as live-in; it's killed at the spill. - // Do not do this for callee-saved registers that are live-in to the - // function because they will already be marked live-in and this will be - // adding it for a second time. It is an error to add the same register - // to the set more than once. - const MachineRegisterInfo &MRI = MF->getRegInfo(); - bool IsLiveIn = MRI.isLiveIn(Reg); - if (!IsLiveIn) - MBB.addLiveIn(Reg); + MBB.addLiveIn(Reg); if (CRSpilled && IsCRField) { CRMIB.addReg(Reg, RegState::ImplicitKill); @@ -1987,10 +1980,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, } } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - // Use !IsLiveIn for the kill flag. - // We do not want to kill registers that are live in this function - // before their use because they will become undefined registers. - TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 34f7fc99f58..3c09ab8d755 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4939,11 +4939,7 @@ SDValue PPCTargetLowering::LowerCallResult( SmallVector<CCValAssign, 16> RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - - CCRetInfo.AnalyzeCallResult( - Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) - ? RetCC_PPC_Cold - : RetCC_PPC); + CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { @@ -5163,7 +5159,6 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( // of the 32-bit SVR4 ABI stack frame layout. assert((CallConv == CallingConv::C || - CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && "Unknown calling convention!"); unsigned PtrByteSize = 4; @@ -6425,10 +6420,7 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); - return CCInfo.CheckReturn( - Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) - ? RetCC_PPC_Cold - : RetCC_PPC); + return CCInfo.CheckReturn(Outs, RetCC_PPC); } SDValue @@ -6440,10 +6432,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeReturn(Outs, - (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) - ? RetCC_PPC_Cold - : RetCC_PPC); + CCInfo.AnalyzeReturn(Outs, RetCC_PPC); SDValue Flag; SmallVector<SDValue, 4> RetOps(1, Chain); diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index a938bb98ce1..6b62a82ef7b 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -144,17 +144,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // On PPC64, we might need to save r2 (but only if it is not reserved). bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); - if (MF->getFunction().getCallingConv() == CallingConv::Cold) { - return TM.isPPC64() - ? (Subtarget.hasAltivec() - ? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList - : CSR_SVR64_ColdCC_Altivec_SaveList) - : (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList - : CSR_SVR64_ColdCC_SaveList)) - : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList - : CSR_SVR32_ColdCC_SaveList); - } - return TM.isPPC64() ? (Subtarget.hasAltivec() ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList @@ -207,13 +196,6 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask : CSR_Darwin32_RegMask); - if (CC == CallingConv::Cold) { - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask - : CSR_SVR64_ColdCC_RegMask) - : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask - : CSR_SVR32_ColdCC_RegMask); - } - return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask : CSR_SVR464_RegMask) : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 226c75f704f..aa4073f7ea0 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -27,11 +27,6 @@ static cl::opt<unsigned> CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), cl::desc("The loop prefetch cache line size")); -static cl::opt<bool> -EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), - cl::desc("Enable using coldcc calling conv for cold " - "internal functions")); - //===----------------------------------------------------------------------===// // // PPC cost model. @@ -220,14 +215,6 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, BaseT::getUnrollingPreferences(L, SE, UP); } -// This function returns true to allow using coldcc calling convention. -// Returning true results in coldcc being used for functions which are cold at -// all call sites when the callers of the functions are not calling any other -// non coldcc functions. -bool PPCTTIImpl::useColdCCForColdCall(Function &F) { - return EnablePPCColdCC; -} - bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { // On the A2, always unroll aggressively. For QPX unaligned loads, we depend // on combining the loads generated for consecutive accesses, and failure to diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 2ee2b3eb808..b42dae4a025 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -61,7 +61,7 @@ public: /// \name Vector TTI Implementations /// @{ - bool useColdCCForColdCall(Function &F); + bool enableAggressiveInterleaving(bool LoopHasReductions); const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( bool IsZeroCmp) const; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 73f351b7c16..65dcd281009 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -22,11 +22,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -57,7 +55,6 @@ #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -91,21 +88,6 @@ STATISTIC(NumNestRemoved , "Number of nest attributes removed"); STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); -STATISTIC(NumInternalFunc, "Number of internal functions"); -STATISTIC(NumColdCC, "Number of functions marked coldcc"); - -static cl::opt<bool> - EnableColdCCStressTest("enable-coldcc-stress-test", - cl::desc("Enable stress test of coldcc by adding " - "calling conv to all internal functions."), - cl::init(false), cl::Hidden); - -static cl::opt<int> ColdCCRelFreq( - "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, - cl::desc( - "Maximum block frequency, expressed as a percentage of caller's " - "entry frequency, for a call site to be considered cold for enabling" - "coldcc")); /// Is this global variable possibly used by a leak checker as a root? If so, /// we might not really want to eliminate the stores to it. @@ -2115,114 +2097,20 @@ static void RemoveNestAttribute(Function *F) { /// idea here is that we don't want to mess with the convention if the user /// explicitly requested something with performance implications like coldcc, /// GHC, or anyregcc. -static bool hasChangeableCC(Function *F) { +static bool isProfitableToMakeFastCC(Function *F) { CallingConv::ID CC = F->getCallingConv(); // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? return CC == CallingConv::C || CC == CallingConv::X86_ThisCall; } -/// Return true if the block containing the call site has a BlockFrequency of -/// less than ColdCCRelFreq% of the entry block. -static bool isColdCallSite(CallSite CS, BlockFrequencyInfo &CallerBFI) { - const BranchProbability ColdProb(ColdCCRelFreq, 100); - auto CallSiteBB = CS.getInstruction()->getParent(); - auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); - auto CallerEntryFreq = - CallerBFI.getBlockFreq(&(CS.getCaller()->getEntryBlock())); - return CallSiteFreq < CallerEntryFreq * ColdProb; -} - -// This function checks if the input function F is cold at all call sites. It -// also looks each call site's containing function, returning false if the -// caller function contains other non cold calls. The input vector AllCallsCold -// contains a list of functions that only have call sites in cold blocks. -static bool -isValidCandidateForColdCC(Function &F, - function_ref<BlockFrequencyInfo &(Function &)> GetBFI, - const std::vector<Function *> &AllCallsCold) { - - if (F.user_empty()) - return false; - - for (User *U : F.users()) { - if (isa<BlockAddress>(U)) - continue; - - CallSite CS(cast<Instruction>(U)); - Function *CallerFunc = CS.getInstruction()->getParent()->getParent(); - BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); - if (!isColdCallSite(CS, CallerBFI)) - return false; - auto It = std::find(AllCallsCold.begin(), AllCallsCold.end(), CallerFunc); - if (It == AllCallsCold.end()) - return false; - } - return true; -} - -static void changeCallSitesToColdCC(Function *F) { - for (User *U : F->users()) { - if (isa<BlockAddress>(U)) - continue; - CallSite CS(cast<Instruction>(U)); - CS.setCallingConv(CallingConv::Cold); - } -} - -// This function iterates over all the call instructions in the input Function -// and checks that all call sites are in cold blocks and are allowed to use the -// coldcc calling convention. -static bool -hasOnlyColdCalls(Function &F, - function_ref<BlockFrequencyInfo &(Function &)> GetBFI) { - for (BasicBlock &BB : F) { - for (Instruction &I : BB) { - if (CallInst *CI = dyn_cast<CallInst>(&I)) { - CallSite CS(cast<Instruction>(CI)); - // Skip over isline asm instructions since they aren't function calls. - if (CI->isInlineAsm()) - continue; - Function *CalledFn = CI->getCalledFunction(); - if (!CalledFn) - return false; - if (!CalledFn->hasLocalLinkage()) - return false; - // Skip over instrinsics since they won't remain as function calls. - if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) - continue; - // Check if it's valid to use coldcc calling convention. - if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() || - CalledFn->hasAddressTaken()) - return false; - BlockFrequencyInfo &CallerBFI = GetBFI(F); - if (!isColdCallSite(CS, CallerBFI)) - return false; - } - } - } - return true; -} - static bool OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, - function_ref<TargetTransformInfo &(Function &)> GetTTI, - function_ref<BlockFrequencyInfo &(Function &)> GetBFI, function_ref<DominatorTree &(Function &)> LookupDomTree, SmallSet<const Comdat *, 8> &NotDiscardableComdats) { - bool Changed = false; - - std::vector<Function *> AllCallsCold; - for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) { - Function *F = &*FI++; - if (hasOnlyColdCalls(*F, GetBFI)) - AllCallsCold.push_back(F); - } - // Optimize functions. for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { Function *F = &*FI++; - // Functions without names cannot be referenced outside this module. if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) F->setLinkage(GlobalValue::InternalLinkage); @@ -2254,25 +2142,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, if (!F->hasLocalLinkage()) continue; - - if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { - NumInternalFunc++; - TargetTransformInfo &TTI = GetTTI(*F); - // Change the calling convention to coldcc if either stress testing is - // enabled or the target would like to use coldcc on functions which are - // cold at all call sites and the callers contain no other non coldcc - // calls. - if (EnableColdCCStressTest || - (isValidCandidateForColdCC(*F, GetBFI, AllCallsCold) && - TTI.useColdCCForColdCall(*F))) { - F->setCallingConv(CallingConv::Cold); - changeCallSitesToColdCC(F); - Changed = true; - NumColdCC++; - } - } - - if (hasChangeableCC(F) && !F->isVarArg() && + if (isProfitableToMakeFastCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { // If this function has a calling convention worth changing, is not a // varargs function, and is only called directly, promote it to use the @@ -2750,8 +2620,6 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { static bool optimizeGlobalsInModule( Module &M, const DataLayout &DL, TargetLibraryInfo *TLI, - function_ref<TargetTransformInfo &(Function &)> GetTTI, - function_ref<BlockFrequencyInfo &(Function &)> GetBFI, function_ref<DominatorTree &(Function &)> LookupDomTree) { SmallSet<const Comdat *, 8> NotDiscardableComdats; bool Changed = false; @@ -2774,8 +2642,8 @@ static bool optimizeGlobalsInModule( NotDiscardableComdats.insert(C); // Delete functions that are trivially dead, ccc -> fastcc - LocalChange |= OptimizeFunctions(M, TLI, GetTTI, GetBFI, LookupDomTree, - NotDiscardableComdats); + LocalChange |= + OptimizeFunctions(M, TLI, LookupDomTree, NotDiscardableComdats); // Optimize global_ctors list. LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { @@ -2812,15 +2680,7 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ return FAM.getResult<DominatorTreeAnalysis>(F); }; - auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { - return FAM.getResult<TargetIRAnalysis>(F); - }; - - auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { - return FAM.getResult<BlockFrequencyAnalysis>(F); - }; - - if (!optimizeGlobalsInModule(M, DL, &TLI, GetTTI, GetBFI, LookupDomTree)) + if (!optimizeGlobalsInModule(M, DL, &TLI, LookupDomTree)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -2843,21 +2703,11 @@ struct GlobalOptLegacyPass : public ModulePass { auto LookupDomTree = [this](Function &F) -> DominatorTree & { return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); }; - auto GetTTI = [this](Function &F) -> TargetTransformInfo & { - return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - }; - - auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { - return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); - }; - - return optimizeGlobalsInModule(M, DL, TLI, GetTTI, GetBFI, LookupDomTree); + return optimizeGlobalsInModule(M, DL, TLI, LookupDomTree); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addRequired<BlockFrequencyInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); } }; @@ -2869,8 +2719,6 @@ char GlobalOptLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt", "Global Variable Optimizer", false, false) diff --git a/llvm/test/CodeGen/PowerPC/coldcc.ll b/llvm/test/CodeGen/PowerPC/coldcc.ll deleted file mode 100644 index 056e944321f..00000000000 --- a/llvm/test/CodeGen/PowerPC/coldcc.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=COLDCC - -define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %cold) { -entry: - %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"(i32 %a, i32 %b) - %mul = mul nsw i32 %0, %cold - %tobool = icmp eq i32 %cold, 0 - br i1 %tobool, label %if.end, label %if.then - -if.then: ; preds = %entry - %mul1 = mul nsw i32 %mul, %cold - %mul2 = mul nsw i32 %b, %a - %call = tail call coldcc signext i32 @callee(i32 signext %a, i32 signext %b) - %add = add i32 %mul2, %a - %add3 = add i32 %add, %mul - %add4 = add i32 %add3, %mul1 - %add5 = add i32 %add4, %call - br label %if.end - -if.end: ; preds = %entry, %if.then - %f.0 = phi i32 [ %add5, %if.then ], [ %0, %entry ] - ret i32 %f.0 -} - -define internal coldcc signext i32 @callee(i32 signext %a, i32 signext %b) local_unnamed_addr #0 { -entry: -; COLDCC: @callee -; COLDCC: std 6, -8(1) -; COLDCC: std 7, -16(1) -; COLDCC: std 8, -24(1) -; COLDCC: std 9, -32(1) -; COLDCC: std 10, -40(1) -; COLDCC: ld 9, -32(1) -; COLDCC: ld 8, -24(1) -; COLDCC: ld 7, -16(1) -; COLDCC: ld 10, -40(1) -; COLDCC: ld 6, -8(1) - %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9},~{r10}"(i32 %a, i32 %b) - %mul = mul nsw i32 %a, 3 - %1 = mul i32 %b, -5 - %add = add i32 %1, %mul - %sub = add i32 %add, %0 - ret i32 %sub -} - -attributes #0 = { noinline } diff --git a/llvm/test/CodeGen/PowerPC/coldcc2.ll b/llvm/test/CodeGen/PowerPC/coldcc2.ll deleted file mode 100644 index 315198fca85..00000000000 --- a/llvm/test/CodeGen/PowerPC/coldcc2.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=COLDCC - -%struct.MyStruct = type { i32, i32, i32, i32 } - -@caller.s = internal unnamed_addr global %struct.MyStruct zeroinitializer, align 8 - -define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %cold) { -entry: -; COLDCC: bl callee -; COLDCC: ld 4, 40(1) -; COLDCC: ld 5, 32(1) - %call = tail call coldcc { i64, i64 } @callee(i32 signext %a, i32 signext %b) - %0 = extractvalue { i64, i64 } %call, 0 - %1 = extractvalue { i64, i64 } %call, 1 - store i64 %0, i64* bitcast (%struct.MyStruct* @caller.s to i64*), align 8 - store i64 %1, i64* bitcast (i32* getelementptr inbounds (%struct.MyStruct, %struct.MyStruct* @caller.s, i64 0, i32 2) to i64*), align 8 - %2 = lshr i64 %1, 32 - %3 = trunc i64 %2 to i32 - %sub = sub nsw i32 0, %3 - ret i32 %sub -} - -define internal coldcc { i64, i64 } @callee(i32 signext %a, i32 signext %b) { -entry: -; COLDCC: std {{[0-9]+}}, 0(3) -; COLDCC: std {{[0-9]+}}, 8(3) - %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9},~{r10}"(i32 %a, i32 %b) - %mul = mul nsw i32 %a, 3 - %1 = mul i32 %b, -5 - %add = add i32 %1, %mul - %sub = add i32 %add, %0 - %mul5 = mul nsw i32 %b, %a - %add6 = add nsw i32 %sub, %mul5 - %retval.sroa.0.0.insert.ext = zext i32 %0 to i64 - %retval.sroa.3.8.insert.ext = zext i32 %sub to i64 - %retval.sroa.3.12.insert.ext = zext i32 %add6 to i64 - %retval.sroa.3.12.insert.shift = shl nuw i64 %retval.sroa.3.12.insert.ext, 32 - %retval.sroa.3.12.insert.insert = or i64 %retval.sroa.3.12.insert.shift, %retval.sroa.3.8.insert.ext - %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.insert.ext, 0 - %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.3.12.insert.insert, 1 - ret { i64, i64 } %.fca.1.insert -} diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll index dddf1338a4d..9e5176eddaa 100644 --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -93,7 +93,7 @@ ; FIXME: There really shouldn't be another pass manager, especially one that ; just builds the domtree. It doesn't even run the verifier. ; CHECK-O2: Pass Arguments: -; CHECK-O2: FunctionPass Manager +; CHECK-O2-NEXT: FunctionPass Manager ; CHECK-O2-NEXT: Dominator Tree Construction define void @foo() { diff --git a/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll b/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll deleted file mode 100644 index 8fedf834f40..00000000000 --- a/llvm/test/Transforms/GlobalOpt/PowerPC/coldcc_coldsites.ll +++ /dev/null @@ -1,81 +0,0 @@ -; RUN: opt -globalopt -mtriple=powerpc64le-unknown-linux-gnu -ppc-enable-coldcc -S < %s | FileCheck %s -check-prefix=COLDCC -; RUN: opt -globalopt -S < %s | FileCheck %s -check-prefix=CHECK - -define signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext %lim, i32 signext %i) local_unnamed_addr #0 !prof !30 { -entry: -; COLDCC: call coldcc signext i32 @callee -; CHECK: call fastcc signext i32 @callee - %add = add nsw i32 %b, %a - %sub = add nsw i32 %lim, -1 - %cmp = icmp eq i32 %sub, %i - br i1 %cmp, label %if.then, label %if.end, !prof !31 - -if.then: ; preds = %entry - %call = tail call signext i32 @callee(i32 signext %a, i32 signext %b) - br label %if.end - -if.end: ; preds = %if.then, %entry - %f.0 = phi i32 [ %call, %if.then ], [ %add, %entry ] - ret i32 %f.0 -} - -define internal signext i32 @callee(i32 signext %a, i32 signext %b) unnamed_addr #0 { -entry: - %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9}"(i32 %a, i32 %b) #1, !srcloc !32 - %mul = mul nsw i32 %a, 3 - %mul1 = shl i32 %0, 1 - %add = add nsw i32 %mul1, %mul - ret i32 %add -} - -define signext i32 @main() local_unnamed_addr #0 !prof !33 { -entry: - br label %for.body - -for.cond.cleanup: ; preds = %for.body - %add.lcssa = phi i32 [ %add, %for.body ] - ret i32 %add.lcssa - -for.body: ; preds = %for.body, %entry - %i.011 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %ret.010 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %call = tail call signext i32 @caller(i32 signext 4, i32 signext 5, i32 signext 10000000, i32 signext %i.011) - %add = add nsw i32 %call, %ret.010 - %inc = add nuw nsw i32 %i.011, 1 - %exitcond = icmp eq i32 %inc, 10000000 - br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !34 -} -attributes #0 = { noinline } - -!0 = !{i32 1, !"ProfileSummary", !1} -!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} -!2 = !{!"ProfileFormat", !"InstrProf"} -!3 = !{!"TotalCount", i64 20000003} -!4 = !{!"MaxCount", i64 10000000} -!5 = !{!"MaxInternalCount", i64 10000000} -!6 = !{!"MaxFunctionCount", i64 10000000} -!7 = !{!"NumCounts", i64 5} -!8 = !{!"NumFunctions", i64 3} -!9 = !{!"DetailedSummary", !10} -!10 = !{!11, !12, !13, !14, !15, !16, !16, !17, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26} -!11 = !{i32 10000, i64 10000000, i32 2} -!12 = !{i32 100000, i64 10000000, i32 2} -!13 = !{i32 200000, i64 10000000, i32 2} -!14 = !{i32 300000, i64 10000000, i32 2} -!15 = !{i32 400000, i64 10000000, i32 2} -!16 = !{i32 500000, i64 10000000, i32 2} -!17 = !{i32 600000, i64 10000000, i32 2} -!18 = !{i32 700000, i64 10000000, i32 2} -!19 = !{i32 800000, i64 10000000, i32 2} -!20 = !{i32 900000, i64 10000000, i32 2} -!21 = !{i32 950000, i64 10000000, i32 2} -!22 = !{i32 990000, i64 10000000, i32 2} -!23 = !{i32 999000, i64 10000000, i32 2} -!24 = !{i32 999900, i64 10000000, i32 2} -!25 = !{i32 999990, i64 10000000, i32 2} -!26 = !{i32 999999, i64 10000000, i32 2} -!30 = !{!"function_entry_count", i64 10000000} -!31 = !{!"branch_weights", i32 2, i32 10000000} -!32 = !{i32 59} -!33 = !{!"function_entry_count", i64 1} -!34 = !{!"branch_weights", i32 2, i32 10000001} diff --git a/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg b/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg deleted file mode 100644 index 5d33887ff0a..00000000000 --- a/llvm/test/Transforms/GlobalOpt/PowerPC/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -if not 'PowerPC' in config.root.targets: - config.unsupported = True - diff --git a/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll b/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll deleted file mode 100644 index 80c9366af6f..00000000000 --- a/llvm/test/Transforms/GlobalOpt/coldcc_stress_test.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: opt < %s -globalopt -S -enable-coldcc-stress-test -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=COLDCC -; RUN: opt < %s -globalopt -S | FileCheck %s -check-prefix=CHECK - -define internal i32 @callee_default(i32* %m) { -; COLDCC-LABEL: define internal coldcc i32 @callee_default -; CHECK-LABEL: define internal fastcc i32 @callee_default - %v = load i32, i32* %m - ret i32 %v -} - -define internal fastcc i32 @callee_fastcc(i32* %m) { -; COLDCC-LABEL: define internal fastcc i32 @callee_fastcc -; CHECK-LABEL: define internal fastcc i32 @callee_fastcc - %v = load i32, i32* %m - ret i32 %v -} - -define internal coldcc i32 @callee_coldcc(i32* %m) { -; COLDCC-LABEL: define internal coldcc i32 @callee_coldcc -; CHECK-LABEL: define internal coldcc i32 @callee_coldcc - %v = load i32, i32* %m - ret i32 %v -} - -define i32 @callee(i32* %m) { - %v = load i32, i32* %m - ret i32 %v -} - -define void @caller() { - %m = alloca i32 - call i32 @callee_default(i32* %m) - call fastcc i32 @callee_fastcc(i32* %m) - call coldcc i32 @callee_coldcc(i32* %m) - call i32 @callee(i32* %m) - ret void -} - -; COLDCC-LABEL: define void @caller() -; COLDCC: call coldcc i32 @callee_default -; COLDCC: call fastcc i32 @callee_fastcc -; COLDCC: call coldcc i32 @callee_coldcc -; COLDCC: call i32 @callee -; CHECK-LABEL: define void @caller() -; CHECK: call fastcc i32 @callee_default -; CHECK: call fastcc i32 @callee_fastcc -; CHECK: call coldcc i32 @callee_coldcc -; CHECK: call i32 @callee |