diff options
author | Jessica Paquette <jpaquette@apple.com> | 2017-07-28 03:21:58 +0000 |
---|---|---|
committer | Jessica Paquette <jpaquette@apple.com> | 2017-07-28 03:21:58 +0000 |
commit | 809d708b8af56391c448b72b49eedae650b98e83 (patch) | |
tree | 74a16171e1e8d74853593d8496b715689c33e1a2 /llvm/lib | |
parent | 75a001ba784f5de87f5b8be731b08b873b5e8551 (diff) | |
download | bcm5719-llvm-809d708b8af56391c448b72b49eedae650b98e83.tar.gz bcm5719-llvm-809d708b8af56391c448b72b49eedae650b98e83.zip |
[MachineOutliner] NFC: Split up getOutliningBenefit
This is some more cleanup in preparation for some actual
functional changes. This splits getOutliningBenefit into
two cost functions: getOutliningCallOverhead and
getOutliningFrameOverhead. These functions return the
number of instructions that would be required to call
a specific function and the number of instructions
that would be required to construct a frame for a
specific funtion. The actual outlining benefit logic
is moved into the outliner, which calls these functions.
The goal of refactoring getOutliningBenefit is to:
- Get us closer to getting rid of the IsTailCall flag
- Further split up "target-specific" things and
"general algorithm" things
llvm-svn: 309356
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/MachineOutliner.cpp | 83 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 452 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.h | 8 |
5 files changed, 312 insertions, 268 deletions
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index ff334a3a310..8df57a27e8a 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -114,7 +114,7 @@ struct OutlinedFunction { /// This is initialized after we go through and create the actual function. MachineFunction *MF = nullptr; - /// A number assigned to this function which appears at the end of its name. + /// A numbefr assigned to this function which appears at the end of its name. size_t Name; /// The number of candidates for this OutlinedFunction. @@ -813,11 +813,13 @@ struct MachineOutliner : public ModulePass { /// /// \param[in,out] CandidateList A list of outlining candidates. /// \param[in,out] FunctionList A list of functions to be outlined. + /// \param Mapper Contains instruction mapping info for outlining. /// \param MaxCandidateLen The length of the longest candidate. /// \param TII TargetInstrInfo for the module. void pruneOverlaps(std::vector<Candidate> &CandidateList, std::vector<OutlinedFunction> &FunctionList, - unsigned MaxCandidateLen, const TargetInstrInfo &TII); + InstructionMapper &Mapper, unsigned MaxCandidateLen, + const TargetInstrInfo &TII); /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. @@ -859,23 +861,40 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree) continue; - // How many instructions would outlining this string save? + // Figure out if this candidate is beneficial. size_t StringLen = Leaf->ConcatLen - Leaf->size(); - unsigned EndVal = ST.Str[Leaf->SuffixIdx + StringLen - 1]; - - // Determine if this is going to be tail called. - // FIXME: The target should decide this. The outlining pass shouldn't care - // about things like tail calling. It should be representation-agnostic. - MachineInstr *LastInstr = Mapper.IntegerInstructionMap[EndVal]; - assert(LastInstr && "Last instruction in sequence was unmapped!"); - bool IsTailCall = LastInstr->isTerminator(); - unsigned Benefit = - TII.getOutliningBenefit(StringLen, Parent.OccurrenceCount, IsTailCall); - - // If it's not beneficial, skip it. - if (Benefit < 1) + size_t CallOverhead = 0; + size_t FrameOverhead = 0; + size_t SequenceOverhead = StringLen; + + // Figure out the call overhead for each instance of the sequence. + for (auto &ChildPair : Parent.Children) { + SuffixTreeNode *M = ChildPair.second; + + if (M && M->IsInTree && M->isLeaf()) { + // Each sequence is over [StartIt, EndIt]. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx]; + MachineBasicBlock::iterator EndIt = + Mapper.InstrList[M->SuffixIdx + StringLen - 1]; + CallOverhead += TII.getOutliningCallOverhead(StartIt, EndIt); + } + } + + // Figure out how many instructions it'll take to construct an outlined + // function frame for this sequence. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[Leaf->SuffixIdx]; + MachineBasicBlock::iterator EndIt = + Mapper.InstrList[Leaf->SuffixIdx + StringLen - 1]; + FrameOverhead = TII.getOutliningFrameOverhead(StartIt, EndIt); + + size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead; + size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount; + + if (NotOutliningCost <= OutliningCost) continue; + size_t Benefit = NotOutliningCost - OutliningCost; + if (StringLen > MaxLen) MaxLen = StringLen; @@ -910,6 +929,7 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList, std::vector<OutlinedFunction> &FunctionList, + InstructionMapper &Mapper, unsigned MaxCandidateLen, const TargetInstrInfo &TII) { // TODO: Experiment with interval trees or other interval-checking structures @@ -993,8 +1013,18 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList, assert(F2.OccurrenceCount > 0 && "Can't remove OutlinedFunction with no occurrences!"); F2.OccurrenceCount--; - F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(), - F2.OccurrenceCount, F2.IsTailCall); + + // Remove the call overhead from the removed sequence. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx]; + MachineBasicBlock::iterator EndIt = + Mapper.InstrList[C2.StartIdx + C2.Len - 1]; + F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt); + // Add back one instance of the sequence. + + if (F2.Sequence.size() > F2.Benefit) + F2.Benefit = 0; + else + F2.Benefit -= F2.Sequence.size(); C2.InCandidateList = false; @@ -1009,8 +1039,19 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList, assert(F1.OccurrenceCount > 0 && "Can't remove OutlinedFunction with no occurrences!"); F1.OccurrenceCount--; - F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(), - F1.OccurrenceCount, F1.IsTailCall); + + // Remove the call overhead from the removed sequence. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx]; + MachineBasicBlock::iterator EndIt = + Mapper.InstrList[C1.StartIdx + C1.Len - 1]; + F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt); + + // Add back one instance of the sequence. + if (F1.Sequence.size() > F1.Benefit) + F1.Benefit = 0; + else + F1.Benefit -= F1.Sequence.size(); + C1.InCandidateList = false; DEBUG(dbgs() << "- Removed C1. \n"; @@ -1206,7 +1247,7 @@ bool MachineOutliner::runOnModule(Module &M) { buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII); // Remove candidates that overlap with other candidates. - pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII); + pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); // Outline each of the candidates and return true if something was outlined. return outline(M, CandidateList, FunctionList, Mapper); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index be39fb22b70..9afd05f99e9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -52,17 +52,17 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" -static cl::opt<unsigned> -TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), - cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); +static cl::opt<unsigned> TBZDisplacementBits( + "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), + cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); -static cl::opt<unsigned> -CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), - cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); +static cl::opt<unsigned> CBZDisplacementBits( + "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), + cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); static cl::opt<unsigned> -BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), - cl::desc("Restrict range of Bcc instructions (DEBUG)")); + BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), + cl::desc("Restrict range of Bcc instructions (DEBUG)")); AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), @@ -172,8 +172,8 @@ bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, return isIntN(Bits, BrOffset / 4); } -MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock( - const MachineInstr &MI) const { +MachineBasicBlock * +AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); @@ -374,12 +374,9 @@ void AArch64InstrInfo::instantiateCondBranch( } } -unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - ArrayRef<MachineOperand> Cond, - const DebugLoc &DL, - int *BytesAdded) const { +unsigned AArch64InstrInfo::insertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); @@ -485,10 +482,11 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, return Opc; } -bool AArch64InstrInfo::canInsertSelect( - const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond, - unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, - int &FalseCycles) const { +bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, + ArrayRef<MachineOperand> Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, + int &FalseCycles) const { // Check register classes. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *RC = @@ -656,8 +654,10 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, MRI.constrainRegClass(FalseReg, RC); // Insert the csel. - BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( - CC); + BuildMI(MBB, I, DL, get(Opc), DstReg) + .addReg(TrueReg) + .addReg(FalseReg) + .addImm(CC); } /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. @@ -1078,11 +1078,7 @@ static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { } } -enum AccessKind { - AK_Write = 0x01, - AK_Read = 0x10, - AK_All = 0x11 -}; +enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 }; /// True when condition flags are accessed (either by writing or reading) /// on the instruction trace starting at From and ending at To. @@ -1111,21 +1107,24 @@ static bool areCFlagsAccessedBetweenInstrs( for (--To; To != From; --To) { const MachineInstr &Instr = *To; - if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) || - ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) + if (((AccessToCheck & AK_Write) && + Instr.modifiesRegister(AArch64::NZCV, TRI)) || + ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI))) return true; } return false; } /// Try to optimize a compare instruction. A compare instruction is an -/// instruction which produces AArch64::NZCV. It can be truly compare instruction +/// instruction which produces AArch64::NZCV. It can be truly compare +/// instruction /// when there are no uses of its destination register. /// /// The following steps are tried in order: /// 1. Convert CmpInstr into an unconditional version. /// 2. Remove CmpInstr if above there is an instruction producing a needed -/// condition code or an instruction which can be converted into such an instruction. +/// condition code or an instruction which can be converted into such an +/// instruction. /// Only comparison with zero is supported. bool AArch64InstrInfo::optimizeCompareInstr( MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, @@ -1187,20 +1186,34 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::SUBSXri: return Instr.getOpcode(); - case AArch64::ADDWrr: return AArch64::ADDSWrr; - case AArch64::ADDWri: return AArch64::ADDSWri; - case AArch64::ADDXrr: return AArch64::ADDSXrr; - case AArch64::ADDXri: return AArch64::ADDSXri; - case AArch64::ADCWr: return AArch64::ADCSWr; - case AArch64::ADCXr: return AArch64::ADCSXr; - case AArch64::SUBWrr: return AArch64::SUBSWrr; - case AArch64::SUBWri: return AArch64::SUBSWri; - case AArch64::SUBXrr: return AArch64::SUBSXrr; - case AArch64::SUBXri: return AArch64::SUBSXri; - case AArch64::SBCWr: return AArch64::SBCSWr; - case AArch64::SBCXr: return AArch64::SBCSXr; - case AArch64::ANDWri: return AArch64::ANDSWri; - case AArch64::ANDXri: return AArch64::ANDSXri; + case AArch64::ADDWrr: + return AArch64::ADDSWrr; + case AArch64::ADDWri: + return AArch64::ADDSWri; + case AArch64::ADDXrr: + return AArch64::ADDSXrr; + case AArch64::ADDXri: + return AArch64::ADDSXri; + case AArch64::ADCWr: + return AArch64::ADCSWr; + case AArch64::ADCXr: + return AArch64::ADCSXr; + case AArch64::SUBWrr: + return AArch64::SUBSWrr; + case AArch64::SUBWri: + return AArch64::SUBSWri; + case AArch64::SUBXrr: + return AArch64::SUBSXrr; + case AArch64::SUBXri: + return AArch64::SUBSXri; + case AArch64::SBCWr: + return AArch64::SBCSWr; + case AArch64::SBCXr: + return AArch64::SBCSXr; + case AArch64::ANDWri: + return AArch64::ANDSWri; + case AArch64::ANDXri: + return AArch64::ANDSXri; } } @@ -1222,7 +1235,7 @@ struct UsedNZCV { UsedNZCV() = default; - UsedNZCV& operator |=(const UsedNZCV& UsedFlags) { + UsedNZCV &operator|=(const UsedNZCV &UsedFlags) { this->N |= UsedFlags.N; this->Z |= UsedFlags.Z; this->C |= UsedFlags.C; @@ -1238,29 +1251,29 @@ struct UsedNZCV { /// codes or we don't optimize CmpInstr in the presence of such instructions. static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) { switch (Instr.getOpcode()) { - default: - return AArch64CC::Invalid; + default: + return AArch64CC::Invalid; - case AArch64::Bcc: { - int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); - assert(Idx >= 2); - return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); - } + case AArch64::Bcc: { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 2); + return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm()); + } - case AArch64::CSINVWr: - case AArch64::CSINVXr: - case AArch64::CSINCWr: - case AArch64::CSINCXr: - case AArch64::CSELWr: - case AArch64::CSELXr: - case AArch64::CSNEGWr: - case AArch64::CSNEGXr: - case AArch64::FCSELSrrr: - case AArch64::FCSELDrrr: { - int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); - assert(Idx >= 1); - return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); - } + case AArch64::CSINVWr: + case AArch64::CSINVXr: + case AArch64::CSINCWr: + case AArch64::CSINCXr: + case AArch64::CSELWr: + case AArch64::CSELXr: + case AArch64::CSNEGWr: + case AArch64::CSNEGXr: + case AArch64::FCSELSrrr: + case AArch64::FCSELDrrr: { + int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV); + assert(Idx >= 1); + return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm()); + } } } @@ -1268,42 +1281,42 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { assert(CC != AArch64CC::Invalid); UsedNZCV UsedFlags; switch (CC) { - default: - break; + default: + break; - case AArch64CC::EQ: // Z set - case AArch64CC::NE: // Z clear - UsedFlags.Z = true; - break; + case AArch64CC::EQ: // Z set + case AArch64CC::NE: // Z clear + UsedFlags.Z = true; + break; - case AArch64CC::HI: // Z clear and C set - case AArch64CC::LS: // Z set or C clear - UsedFlags.Z = true; - LLVM_FALLTHROUGH; - case AArch64CC::HS: // C set - case AArch64CC::LO: // C clear - UsedFlags.C = true; - break; + case AArch64CC::HI: // Z clear and C set + case AArch64CC::LS: // Z set or C clear + UsedFlags.Z = true; + LLVM_FALLTHROUGH; + case AArch64CC::HS: // C set + case AArch64CC::LO: // C clear + UsedFlags.C = true; + break; - case AArch64CC::MI: // N set - case AArch64CC::PL: // N clear - UsedFlags.N = true; - break; + case AArch64CC::MI: // N set + case AArch64CC::PL: // N clear + UsedFlags.N = true; + break; - case AArch64CC::VS: // V set - case AArch64CC::VC: // V clear - UsedFlags.V = true; - break; + case AArch64CC::VS: // V set + case AArch64CC::VC: // V clear + UsedFlags.V = true; + break; - case AArch64CC::GT: // Z clear, N and V the same - case AArch64CC::LE: // Z set, N and V differ - UsedFlags.Z = true; - LLVM_FALLTHROUGH; - case AArch64CC::GE: // N and V the same - case AArch64CC::LT: // N and V differ - UsedFlags.N = true; - UsedFlags.V = true; - break; + case AArch64CC::GT: // Z clear, N and V the same + case AArch64CC::LE: // Z set, N and V differ + UsedFlags.Z = true; + LLVM_FALLTHROUGH; + case AArch64CC::GE: // N and V the same + case AArch64CC::LT: // N and V differ + UsedFlags.N = true; + UsedFlags.V = true; + break; } return UsedFlags; } @@ -1328,7 +1341,7 @@ static bool isSUBSRegImm(unsigned Opcode) { /// nor uses of flags between MI and CmpInstr. /// - and C/V flags are not used after CmpInstr static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI) { assert(MI); assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END); assert(CmpInstr); @@ -1350,7 +1363,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, return false; UsedNZCV NZCVUsedAfterCmp; - for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end(); + for (auto I = std::next(CmpInstr->getIterator()), + E = CmpInstr->getParent()->instr_end(); I != E; ++I) { const MachineInstr &Instr = *I; if (Instr.readsRegister(AArch64::NZCV, TRI)) { @@ -1363,7 +1377,7 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr, if (Instr.modifiesRegister(AArch64::NZCV, TRI)) break; } - + return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V; } @@ -1421,16 +1435,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addMemOperand(*MI.memoperands_begin()); } else if (TM.getCodeModel() == CodeModel::Large) { BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) - .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); + .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) + .addImm(0); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); + .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC) + .addImm(16); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); + .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC) + .addImm(32); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); + .addGlobalAddress(GV, 0, AArch64II::MO_G3) + .addImm(48); BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) .addReg(Reg, RegState::Kill) .addImm(0) @@ -1812,7 +1830,7 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( } else return false; - // Get the scaling factor for the instruction and set the width for the + // Get the scaling factor for the instruction and set the width for the // instruction. unsigned Scale = 0; int64_t Dummy1, Dummy2; @@ -1835,10 +1853,10 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( return true; } -MachineOperand& +MachineOperand & AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); - MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1); + MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1); assert(OfsOp.isImm() && "Offset operand wasn't immediate."); return OfsOp; } @@ -1847,7 +1865,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) const { switch (Opcode) { - // Not a memory operation or something we want to handle. + // Not a memory operation or something we want to handle. default: Scale = Width = 0; MinOffset = MaxOffset = 0; @@ -2102,12 +2120,13 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, return ((DestReg - SrcReg) & 0x1f) < NumRegs; } -void AArch64InstrInfo::copyPhysRegTuple( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, - unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, - ArrayRef<unsigned> Indices) const { - assert(Subtarget.hasNEON() && - "Unexpected register copy without NEON"); +void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc, + unsigned Opcode, + ArrayRef<unsigned> Indices) const { + assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); const TargetRegisterInfo *TRI = &getRegisterInfo(); uint16_t DestEncoding = TRI->getEncodingValue(DestReg); uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); @@ -2160,8 +2179,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( - AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { if (Subtarget.hasZeroCycleRegMove()) { // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. @@ -2196,8 +2216,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(0) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( - AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { // Otherwise, expand to ORR XZR. BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) @@ -2210,8 +2231,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a DDDD register quad by copying the individual sub-registers. if (AArch64::DDDDRegClass.contains(DestReg) && AArch64::DDDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, - AArch64::dsub2, AArch64::dsub3 }; + static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2, AArch64::dsub3}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; @@ -2220,8 +2241,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a DDD register triple by copying the individual sub-registers. if (AArch64::DDDRegClass.contains(DestReg) && AArch64::DDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, - AArch64::dsub2 }; + static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; @@ -2230,7 +2251,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a DD register pair by copying the individual sub-registers. if (AArch64::DDRegClass.contains(DestReg) && AArch64::DDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; + static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, Indices); return; @@ -2239,8 +2260,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a QQQQ register quad by copying the individual sub-registers. if (AArch64::QQQQRegClass.contains(DestReg) && AArch64::QQQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, - AArch64::qsub2, AArch64::qsub3 }; + static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; @@ -2249,8 +2270,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a QQQ register triple by copying the individual sub-registers. if (AArch64::QQQRegClass.contains(DestReg) && AArch64::QQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, - AArch64::qsub2 }; + static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; @@ -2259,7 +2280,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy a QQ register pair by copying the individual sub-registers. if (AArch64::QQRegClass.contains(DestReg) && AArch64::QQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; + static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1}; copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, Indices); return; @@ -2267,28 +2288,28 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR128RegClass.contains(DestReg) && AArch64::FPR128RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) .addReg(SrcReg) .addReg(SrcReg, getKillRegState(KillSrc)); } else { BuildMI(MBB, I, DL, get(AArch64::STRQpre)) - .addReg(AArch64::SP, RegState::Define) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addReg(AArch64::SP) - .addImm(-16); + .addReg(AArch64::SP, RegState::Define) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(AArch64::SP) + .addImm(-16); BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) - .addReg(AArch64::SP, RegState::Define) - .addReg(DestReg, RegState::Define) - .addReg(AArch64::SP) - .addImm(16); + .addReg(AArch64::SP, RegState::Define) + .addReg(DestReg, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); } return; } if (AArch64::FPR64RegClass.contains(DestReg) && AArch64::FPR64RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, @@ -2305,7 +2326,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR32RegClass.contains(DestReg) && AArch64::FPR32RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, @@ -2322,7 +2343,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR16RegClass.contains(DestReg) && AArch64::FPR16RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, @@ -2343,7 +2364,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR8RegClass.contains(DestReg) && AArch64::FPR8RegClass.contains(SrcReg)) { - if(Subtarget.hasNEON()) { + if (Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, @@ -2392,17 +2413,17 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (DestReg == AArch64::NZCV) { assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); BuildMI(MBB, I, DL, get(AArch64::MSR)) - .addImm(AArch64SysReg::NZCV) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); + .addImm(AArch64SysReg::NZCV) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); return; } if (SrcReg == AArch64::NZCV) { assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg) - .addImm(AArch64SysReg::NZCV) - .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); + .addImm(AArch64SysReg::NZCV) + .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); return; } @@ -2458,45 +2479,39 @@ void AArch64InstrInfo::storeRegToStackSlot( if (AArch64::FPR128RegClass.hasSubClassEq(RC)) Opc = AArch64::STRQui; else if (AArch64::DDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov1d; Offset = false; } break; case 24: if (AArch64::DDDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev1d; Offset = false; } break; case 32: if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv1d; Offset = false; } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d; Offset = false; } break; case 48: if (AArch64::QQQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev2d; Offset = false; } break; case 64: if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register store without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d; Offset = false; } @@ -2505,8 +2520,8 @@ void AArch64InstrInfo::storeRegToStackSlot( assert(Opc && "Unknown register class"); const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI); + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI); if (Offset) MI.addImm(0); @@ -2562,45 +2577,39 @@ void AArch64InstrInfo::loadRegFromStackSlot( if (AArch64::FPR128RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRQui; else if (AArch64::DDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov1d; Offset = false; } break; case 24: if (AArch64::DDDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev1d; Offset = false; } break; case 32: if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv1d; Offset = false; } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d; Offset = false; } break; case 48: if (AArch64::QQQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev2d; Offset = false; } break; case 64: if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasNEON() && - "Unexpected register load without NEON"); + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d; Offset = false; } @@ -2609,8 +2618,8 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Opc && "Unknown register class"); const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(DestReg, getDefRegState(true)) - .addFrameIndex(FI); + .addReg(DestReg, getDefRegState(true)) + .addFrameIndex(FI); if (Offset) MI.addImm(0); MI.addMemOperand(MMO); @@ -2755,7 +2764,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == - TRI.getRegSizeInBits(*getRegClass(SrcReg)) && + TRI.getRegSizeInBits(*getRegClass(SrcReg)) && "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, @@ -3138,10 +3147,7 @@ void AArch64InstrInfo::getNoop(MCInst &NopInst) const { } // AArch64 supports MachineCombiner. -bool AArch64InstrInfo::useMachineCombiner() const { - - return true; -} +bool AArch64InstrInfo::useMachineCombiner() const { return true; } // True when Opc sets flag static bool isCombineInstrSettingFlag(unsigned Opc) { @@ -3275,7 +3281,8 @@ static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, // 1. Other data types (integer, vectors) // 2. Other math / logic operations (xor, or) // 3. Other forms of the same operation (intrinsics and other variants) -bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { +bool AArch64InstrInfo::isAssociativeAndCommutative( + const MachineInstr &Inst) const { switch (Inst.getOpcode()) { case AArch64::FADDDrr: case AArch64::FADDSrr: @@ -3595,8 +3602,8 @@ static bool getFMAPatterns(MachineInstr &Root, /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. /// \param Pattern - combiner pattern -bool -AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { +bool AArch64InstrInfo::isThroughputPattern( + MachineCombinerPattern Pattern) const { switch (Pattern) { default: break; @@ -3747,8 +3754,8 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, - unsigned IdxMulOpd, unsigned MaddOpc, - unsigned VR, const TargetRegisterClass *RC) { + unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, + const TargetRegisterClass *RC) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); @@ -3767,11 +3774,11 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, if (TargetRegisterInfo::isVirtualRegister(VR)) MRI.constrainRegClass(VR, RC); - MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), - ResultReg) - .addReg(SrcReg0, getKillRegState(Src0IsKill)) - .addReg(SrcReg1, getKillRegState(Src1IsKill)) - .addReg(VR); + MachineInstrBuilder MIB = + BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) + .addReg(SrcReg0, getKillRegState(Src0IsKill)) + .addReg(SrcReg1, getKillRegState(Src1IsKill)) + .addReg(VR); // Insert the MADD InsInstrs.push_back(MIB); return MUL; @@ -4401,12 +4408,9 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { using namespace AArch64II; static const std::pair<unsigned, const char *> TargetFlags[] = { - {MO_PAGE, "aarch64-page"}, - {MO_PAGEOFF, "aarch64-pageoff"}, - {MO_G3, "aarch64-g3"}, - {MO_G2, "aarch64-g2"}, - {MO_G1, "aarch64-g1"}, - {MO_G0, "aarch64-g0"}, + {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, + {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"}, + {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"}, {MO_HI12, "aarch64-hi12"}}; return makeArrayRef(TargetFlags); } @@ -4416,9 +4420,7 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { using namespace AArch64II; static const std::pair<unsigned, const char *> TargetFlags[] = { - {MO_GOT, "aarch64-got"}, - {MO_NC, "aarch64-nc"}, - {MO_TLS, "aarch64-tls"}}; + {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}}; return makeArrayRef(TargetFlags); } @@ -4430,26 +4432,27 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { return makeArrayRef(TargetFlags); } -unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize, - size_t Occurrences, - bool CanBeTailCall) const { - unsigned NotOutlinedSize = SequenceSize * Occurrences; - unsigned OutlinedSize; - - // Is this candidate something we can outline as a tail call? - if (CanBeTailCall) { - // If yes, then we just outline the sequence and replace each of its - // occurrences with a branch instruction. - OutlinedSize = SequenceSize + Occurrences; - } else { - // If no, then we outline the sequence (SequenceSize), add a return (+1), - // and replace each occurrence with a save/restore to LR and a call - // (3 * Occurrences) - OutlinedSize = (SequenceSize + 1) + (3 * Occurrences); - } +size_t AArch64InstrInfo::getOutliningCallOverhead( + MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const { + // Is this a tail-call? + if (EndIt->isTerminator()) + return 1; // Yes, so we don't need to save/restore LR. - // Return the number of instructions saved by outlining this sequence. - return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; + // No, so save + restore LR. + return 3; +} + +size_t AArch64InstrInfo::getOutliningFrameOverhead( + MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const { + + // Is this a tail-call? + if (EndIt->isTerminator()) + return 0; // Yes, so we already have a return. + + // No, so we have to add a return to the end. + return 1; } bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { @@ -4475,7 +4478,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // Is this the end of a function? if (MI.getParent()->succ_empty()) - return MachineOutlinerInstrType::Legal; + return MachineOutlinerInstrType::Legal; // It's not, so don't outline it. return MachineOutlinerInstrType::Illegal; @@ -4494,7 +4497,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // Don't outline anything that uses the link register. if (MI.modifiesRegister(AArch64::LR, &RI) || MI.readsRegister(AArch64::LR, &RI)) - return MachineOutlinerInstrType::Illegal; + return MachineOutlinerInstrType::Illegal; // Does this use the stack? if (MI.modifiesRegister(AArch64::SP, &RI) || @@ -4502,13 +4505,13 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // Is it a memory operation? if (MI.mayLoadOrStore()) { - unsigned Base; // Filled with the base regiser of MI. + unsigned Base; // Filled with the base regiser of MI. int64_t Offset; // Filled with the offset of MI. unsigned DummyWidth; // Does it allow us to offset the base register and is the base SP? if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) || - Base != AArch64::SP) + Base != AArch64::SP) return MachineOutlinerInstrType::Illegal; // Find the minimum/maximum offset for this instruction and check if @@ -4522,7 +4525,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // This is tricky to test with IR tests, but when the outliner is moved // to a MIR test, it really ought to be checked. if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset) - return MachineOutlinerInstrType::Illegal; + return MachineOutlinerInstrType::Illegal; // It's in range, so we can outline it. return MachineOutlinerInstrType::Legal; @@ -4558,7 +4561,7 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { // We've pushed the return address to the stack, so add 16 to the offset. // This is safe, since we already checked if it would overflow when we // checked if this instruction was legal to outline. - int64_t NewImm = (Offset + 16)/Scale; + int64_t NewImm = (Offset + 16) / Scale; StackOffsetOperand.setImm(NewImm); } } @@ -4624,4 +4627,3 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( return It; } - diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 64f9743ab94..076a32f911f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -299,8 +299,10 @@ public: getSerializableMachineMemOperandTargetFlags() const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; - unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, - bool CanBeTailCall) const override; + size_t getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const override; + size_t getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const override; AArch64GenInstrInfo::MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const override; void insertOutlinerEpilogue(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f01025545e7..8eb1536790d 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -10537,25 +10537,22 @@ char LDTLSCleanup::ID = 0; FunctionPass* llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } -unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize, - size_t Occurrences, - bool CanBeTailCall) const { - unsigned NotOutlinedSize = SequenceSize * Occurrences; - unsigned OutlinedSize; +size_t X86InstrInfo::getOutliningCallOverhead( +MachineBasicBlock::iterator &StartIt, +MachineBasicBlock::iterator &EndIt) const { + // We just have to emit a call, so return 1. + return 1; +} - // Is it a tail call? - if (CanBeTailCall) { - // If yes, we don't have to include a return instruction-- it's already in - // our sequence. So we have one occurrence of the sequence + #Occurrences - // calls. - OutlinedSize = SequenceSize + Occurrences; - } else { - // If not, add one for the return instruction. - OutlinedSize = (SequenceSize + 1) + Occurrences; - } +size_t X86InstrInfo::getOutliningFrameOverhead( +MachineBasicBlock::iterator &StartIt, +MachineBasicBlock::iterator &EndIt) const { + // Is this a tail-call? + if (EndIt->isTerminator()) + return 0; // Yes, so we already have a return. - // Return the number of instructions saved by outlining this sequence. - return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; + // No, so we have to add a return to the end. + return 1; } bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index e64876073cc..38caf04f7f8 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -566,9 +566,11 @@ public: ArrayRef<std::pair<unsigned, const char *>> getSerializableDirectMachineOperandTargetFlags() const override; - unsigned getOutliningBenefit(size_t SequenceSize, - size_t Occurrences, - bool CanBeTailCall) const override; + size_t getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const override; + + size_t getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt, + MachineBasicBlock::iterator &EndIt) const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; |