diff options
author | Hiroshi Yamauchi <yamauchi@google.com> | 2019-11-11 10:59:36 -0800 |
---|---|---|
committer | Hiroshi Yamauchi <yamauchi@google.com> | 2019-11-21 14:16:00 -0800 |
commit | 52e377497ddc3aa7178d4fd4c0cda096df4d8a72 (patch) | |
tree | 86b84d9ff3012c4c5984b748c99650babd8aabcb /llvm/lib | |
parent | 844d97f650a2d716e63e3be903c32a82f2f817b1 (diff) | |
download | bcm5719-llvm-52e377497ddc3aa7178d4fd4c0cda096df4d8a72.tar.gz bcm5719-llvm-52e377497ddc3aa7178d4fd4c0cda096df4d8a72.zip |
[PGO][PGSO] DAG.shouldOptForSize part.
Summary:
(Split of off D67120)
SelectionDAG::shouldOptForSize changes for profile guided size optimization.
Reviewers: davidxl
Subscribers: hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70095
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 27 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 8 |
9 files changed, 48 insertions, 34 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 94e98cd8065..fb7ddf5b233 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -217,7 +217,7 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { - ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + ForCodeSize = DAG.shouldOptForSize(); MaximumLegalStoreInBits = 0; // We use the minimum store size here, since that's all we can guarantee @@ -12885,7 +12885,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { // Assume that libcalls are the smallest code. // TODO: This restriction should probably be lifted for vectors. - if (DAG.getMachineFunction().getFunction().hasOptSize()) + if (ForCodeSize) return SDValue(); // pow(X, 0.25) --> sqrt(sqrt(X)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 7cfd8ade954..f1b88d80f43 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -63,6 +65,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -1005,7 +1008,9 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - LegacyDivergenceAnalysis * Divergence) { + LegacyDivergenceAnalysis * Divergence, + ProfileSummaryInfo *PSIin, + BlockFrequencyInfo *BFIin) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1014,6 +1019,8 @@ void SelectionDAG::init(MachineFunction &NewMF, LibInfo = LibraryInfo; Context = &MF->getFunction().getContext(); DA = Divergence; + PSI = PSIin; + BFI = BFIin; } SelectionDAG::~SelectionDAG() { @@ -1023,6 +1030,11 @@ SelectionDAG::~SelectionDAG() { delete DbgInfo; } +bool SelectionDAG::shouldOptForSize() const { + return MF->getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI); +} + void SelectionDAG::allnodes_clear() { assert(&*AllNodes.begin() == &EntryNode); AllNodes.remove(AllNodes.begin()); @@ -1427,7 +1439,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction().hasOptSize() + Alignment = shouldOptForSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -5733,12 +5745,13 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { SrcDelta + G->getOffset()); } -static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { +static bool shouldLowerMemFuncForSize(const MachineFunction &MF, + SelectionDAG &DAG) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) return MF.getFunction().hasMinSize(); - return MF.getFunction().hasOptSize(); + return DAG.shouldOptForSize(); } static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, @@ -5788,7 +5801,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -5971,7 +5984,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -6077,7 +6090,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4fa907a4dcf..1ed0dc2c979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -28,10 +28,12 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -5360,8 +5362,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function &F = DAG.getMachineFunction().getFunction(); - if (!F.hasOptSize() || + bool OptForSize = DAG.shouldOptForSize(); + if (!OptForSize || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -10441,7 +10443,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr); + SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI()); SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2757e1cf015..c6a2bfd31fe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -444,7 +444,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable<LegacyDivergenceAnalysis>()); + getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), + nullptr, nullptr); FuncInfo->set(Fn, *MF, CurDAG); SwiftError->setFunction(*MF); diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 4d4f76e8e91..ae6c1429e56 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -39,20 +39,16 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; - bool ForCodeSize; - public: explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), - ForCodeSize(false) {} + : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} StringRef getPassName() const override { return "AArch64 Instruction Selection"; } bool runOnMachineFunction(MachineFunction &MF) override { - ForCodeSize = MF.getFunction().hasOptSize(); Subtarget = &MF.getSubtarget<AArch64Subtarget>(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -399,7 +395,7 @@ static bool isWorthFoldingSHL(SDValue V) { bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { // Trivial if we are optimizing for code size or if there is only // one use of the value. - if (ForCodeSize || V.hasOneUse()) + if (CurDAG->shouldOptForSize() || V.hasOneUse()) return true; // If a subtarget has a fastpath LSL we can fold a logical shift into // the addressing mode and save a cycle. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 7ae5b5dd6a8..80cf31ff3d5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -518,10 +518,10 @@ def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; // the Function object through the <Target>Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { - def ForCodeSize : Predicate<"MF->getFunction().hasOptSize()">; - def NotForCodeSize : Predicate<"!MF->getFunction().hasOptSize()">; + def ForCodeSize : Predicate<"shouldOptForSize(MF)">; + def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; // Avoid generating STRQro if it is slow, unless we're optimizing for code size. - def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().hasOptSize()">; + def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index a91b23a3ce5..56508fc8840 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -335,7 +335,7 @@ namespace { // Do not want to hoist if we're not optimizing for size. // TODO: We'd like to remove this restriction. // See the comment in X86InstrInfo.td for more info. - if (!OptForSize) + if (!CurDAG->shouldOptForSize()) return false; // Walk all the users of the immediate. @@ -3019,7 +3019,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { LLVM_FALLTHROUGH; case X86ISD::ADD: // Try to match inc/dec. - if (!Subtarget->slowIncDec() || OptForSize) { + if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) { bool IsOne = isOneConstant(StoredVal.getOperand(1)); bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2f9a714f92a..07d77485254 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25,7 +25,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -8397,7 +8399,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // TODO: If multiple splats are generated to load the same constant, // it may be detrimental to overall size. There needs to be a way to detect // that condition to know if this is truly a size win. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); // Handle broadcasting a single constant scalar from the constant pool // into a vector. @@ -11174,7 +11176,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, case MVT::v32i16: case MVT::v64i8: { // Attempt to lower to a bitmask if we can. Only if not optimizing for size. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); if (!OptForSize) { if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -18315,7 +18317,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, "Unexpected funnel shift type!"); // Expand slow SHLD/SHRD cases if we are not optimizing for size. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); if (!OptForSize && Subtarget.isSHLDSlow()) return SDValue(); @@ -18547,7 +18549,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, /// implementation, and likely shuffle complexity of the alternate sequence. static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool IsOptimizingSize = DAG.shouldOptForSize(); bool HasFastHOps = Subtarget.hasFastHorizontalOps(); return !IsSingleSource || IsOptimizingSize || HasFastHOps; } @@ -20473,7 +20475,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, } else { // Use BT if the immediate can't be encoded in a TEST instruction or we // are optimizing for size and the immedaite won't fit in a byte. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) && isPowerOf2_64(AndRHSVal)) { Src = AndLHS; @@ -39645,7 +39647,7 @@ static SDValue combineOrShiftToFunnelShift(SDNode *N, SelectionDAG &DAG, return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); unsigned Bits = VT.getScalarSizeInBits(); // SHLD/SHRD instructions have lower register pressure, but on some diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index e452145f3b6..3ab45ba833f 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -983,12 +983,12 @@ def IsNotPIC : Predicate<"!TM.isPositionIndependent()">; // the Function object through the <Target>Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { - def OptForSize : Predicate<"MF->getFunction().hasOptSize()">; + def OptForSize : Predicate<"shouldOptForSize(MF)">; def OptForMinSize : Predicate<"MF->getFunction().hasMinSize()">; - def OptForSpeed : Predicate<"!MF->getFunction().hasOptSize()">; + def OptForSpeed : Predicate<"!shouldOptForSize(MF)">; def UseIncDec : Predicate<"!Subtarget->slowIncDec() || " - "MF->getFunction().hasOptSize()">; - def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().hasOptSize() || " + "shouldOptForSize(MF)">; + def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || " "!Subtarget->hasSSE41()">; } |