diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-04-03 20:53:20 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-04-03 20:53:20 +0000 |
commit | 396653f8a1fb5db910599e29557a47cfd85ca119 (patch) | |
tree | 716d00da0da7375e59933aa073e833320c385016 /llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | |
parent | 060bf99f493ea5515fa09dec8dc949dc6d57db09 (diff) | |
download | bcm5719-llvm-396653f8a1fb5db910599e29557a47cfd85ca119.tar.gz bcm5719-llvm-396653f8a1fb5db910599e29557a47cfd85ca119.zip |
AMDGPU: Split block for si_end_cf
Relying on no spill or other code being inserted before this was
precarious. It relied on code diligently checking isBasicBlockPrologue
which is likely to be forgotten.
Ideally this could be done earlier, but this doesn't work because of
phis. Any other instruction can't be placed before them, so we have to
accept the position being incorrect during SSA.
This avoids regressions in the fast register allocator rewrite from
inverting the direction.
llvm-svn: 357634
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 127 |
1 files changed, 112 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index a399f7715a2..02b47e869bf 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -55,6 +55,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -79,8 +80,11 @@ class SILowerControlFlow : public MachineFunctionPass { private: const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; - LiveIntervals *LIS = nullptr; MachineRegisterInfo *MRI = nullptr; + LiveIntervals *LIS = nullptr; + MachineDominatorTree *DT = nullptr; + MachineLoopInfo *MLI = nullptr; + void emitIf(MachineInstr &MI); void emitElse(MachineInstr &MI); @@ -111,7 +115,7 @@ public: AU.addPreservedID(LiveVariablesID); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); - AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -388,23 +392,99 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) { MI.eraseFromParent(); } +// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB +// is split as necessary to keep the exec modification in its own block. +static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB, + MachineInstr &MI, + MachineInstr *NewMI, + MachineDominatorTree *DT, + LiveIntervals *LIS, + MachineLoopInfo *MLI) { + assert(NewMI->isTerminator()); + + MachineBasicBlock::iterator InsPt = MI.getIterator(); + if (std::next(MI.getIterator()) == MBB.end()) { + // Don't bother with a new block. + MBB.insert(InsPt, NewMI); + if (LIS) + LIS->ReplaceMachineInstrInMaps(MI, *NewMI); + MI.eraseFromParent(); + return &MBB; + } + + MachineFunction *MF = MBB.getParent(); + MachineBasicBlock *SplitMBB + = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MachineFunction::iterator(MBB), SplitMBB); + + // FIXME: This is working around a MachineDominatorTree API defect. + // + // If a previous pass split a critical edge, it may not have been applied to + // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects + // the CFG of the given block. Make sure to call a dominator tree method that + // will flush this cache before touching the successors of the block. + MachineDomTreeNode *NodeMBB = nullptr; + if (DT) + NodeMBB = DT->getNode(&MBB); + + // Move everything to the new block, except the end_cf pseudo. + SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end()); + + SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + MBB.addSuccessor(SplitMBB, BranchProbability::getOne()); + + MBB.insert(MBB.end(), NewMI); + + if (DT) { + std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren(); + DT->addNewBlock(SplitMBB, &MBB); + + // Reparent all of the children to the new block body. + auto *SplitNode = DT->getNode(SplitMBB); + for (auto *Child : Children) + DT->changeImmediateDominator(Child, SplitNode); + } + + if (MLI) { + if (MachineLoop *Loop = MLI->getLoopFor(&MBB)) + Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase()); + } + + if (LIS) { + LIS->insertMBBInMaps(SplitMBB); + LIS->ReplaceMachineInstrInMaps(MI, *NewMI); + } + + // All live-ins are forwarded. + for (auto &LiveIn : MBB.liveins()) + SplitMBB->addLiveIn(LiveIn); + + MI.eraseFromParent(); + return SplitMBB; +} + void SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator InsPt = MBB.begin(); - MachineInstr *NewMI = - BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC) - .addReg(AMDGPU::EXEC) - .add(MI.getOperand(0)); + // First, move the instruction. It's unnecessarily difficult to update + // LiveIntervals when there's a change in control flow, so move the + // instruction before changing the blocks. + MBB.splice(InsPt, &MBB, MI.getIterator()); if (LIS) - LIS->ReplaceMachineInstrInMaps(MI, *NewMI); + LIS->handleMove(MI); - MI.eraseFromParent(); + MachineFunction *MF = MBB.getParent(); - if (LIS) - LIS->handleMove(*NewMI); + // Create instruction without inserting it yet. + MachineInstr *NewMI + = BuildMI(*MF, DL, TII->get(AMDGPU::S_OR_B64_term), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .add(MI.getOperand(0)); + insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI); } // Returns replace operands for a logical operation, either single result @@ -470,17 +550,20 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable<LiveIntervals>(); + DT = getAnalysisIfAvailable<MachineDominatorTree>(); + MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + MRI = &MF.getRegInfo(); MachineFunction::iterator NextBB; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; BI = NextBB) { NextBB = std::next(BI); - MachineBasicBlock &MBB = *BI; + MachineBasicBlock *MBB = &*BI; MachineBasicBlock::iterator I, Next, Last; - for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) { + for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; @@ -501,10 +584,24 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { emitLoop(MI); break; - case AMDGPU::SI_END_CF: + case AMDGPU::SI_END_CF: { + MachineInstr *NextMI = nullptr; + + if (Next != MBB->end()) + NextMI = &*Next; + emitEndCf(MI); - break; + if (NextMI) { + MBB = NextMI->getParent(); + Next = NextMI->getIterator(); + Last = MBB->end(); + } + + NextBB = std::next(MBB->getIterator()); + BE = MF.end(); + break; + } case AMDGPU::S_AND_B64: case AMDGPU::S_OR_B64: // Cleanup bit manipulations on exec mask @@ -518,7 +615,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { } // Replay newly inserted code to combine masks - Next = (Last == MBB.end()) ? MBB.begin() : Last; + Next = (Last == MBB->end()) ? MBB->begin() : Last; } } |