diff options
| author | Mark Searles <m.c.searles@gmail.com> | 2019-04-27 00:51:18 +0000 |
|---|---|---|
| committer | Mark Searles <m.c.searles@gmail.com> | 2019-04-27 00:51:18 +0000 |
| commit | 76c5b62988ffa2cd64facc48074e5cb355c8a5f4 (patch) | |
| tree | 13bedbc36ab4e1c144ee5a1d41be84eef300fe33 /llvm/lib/Target/AMDGPU | |
| parent | c1008e4d3dabfe62120549740decfe6637c2ae55 (diff) | |
| download | bcm5719-llvm-76c5b62988ffa2cd64facc48074e5cb355c8a5f4.tar.gz bcm5719-llvm-76c5b62988ffa2cd64facc48074e5cb355c8a5f4.zip | |
Revert "AMDGPU: Split block for si_end_cf"
This reverts commit 7a6ef3004655dd86d722199c471ae78c28e31bb4.
We discovered some internal test failures, so reverting for now.
Differential Revision: https://reviews.llvm.org/D61213
llvm-svn: 359363
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 127 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 4 |
5 files changed, 17 insertions, 128 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d9280edb6ee..0983da17d87 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1214,12 +1214,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(AMDGPU::S_XOR_B64)); break; - case AMDGPU::S_OR_B64_term: - // This is only a terminator to get the correct spill code placement during - // register allocation. - MI.setDesc(get(AMDGPU::S_OR_B64)); - break; - case AMDGPU::S_ANDN2_B64_term: // This is only a terminator to get the correct spill code placement during // register allocation. @@ -1704,7 +1698,6 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, case AMDGPU::SI_MASK_BRANCH: case AMDGPU::S_MOV_B64_term: case AMDGPU::S_XOR_B64_term: - case AMDGPU::S_OR_B64_term: case AMDGPU::S_ANDN2_B64_term: break; case AMDGPU::SI_IF: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b66287c369b..e6c8fb68e1e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -189,7 +189,6 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI< } def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>; -def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>; def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>; def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>; diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 02b47e869bf..a399f7715a2 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -55,7 +55,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -80,11 +79,8 @@ class SILowerControlFlow : public MachineFunctionPass { private: const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; - MachineRegisterInfo *MRI = nullptr; LiveIntervals *LIS = nullptr; - MachineDominatorTree *DT = nullptr; - MachineLoopInfo *MLI = nullptr; - + MachineRegisterInfo *MRI = nullptr; void emitIf(MachineInstr &MI); void emitElse(MachineInstr &MI); @@ -115,7 +111,7 @@ public: AU.addPreservedID(LiveVariablesID); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); - + AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -392,99 +388,23 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) { MI.eraseFromParent(); } -// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB -// is split as necessary to keep the exec modification in its own block. -static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB, - MachineInstr &MI, - MachineInstr *NewMI, - MachineDominatorTree *DT, - LiveIntervals *LIS, - MachineLoopInfo *MLI) { - assert(NewMI->isTerminator()); - - MachineBasicBlock::iterator InsPt = MI.getIterator(); - if (std::next(MI.getIterator()) == MBB.end()) { - // Don't bother with a new block. - MBB.insert(InsPt, NewMI); - if (LIS) - LIS->ReplaceMachineInstrInMaps(MI, *NewMI); - MI.eraseFromParent(); - return &MBB; - } - - MachineFunction *MF = MBB.getParent(); - MachineBasicBlock *SplitMBB - = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - - MF->insert(++MachineFunction::iterator(MBB), SplitMBB); - - // FIXME: This is working around a MachineDominatorTree API defect. - // - // If a previous pass split a critical edge, it may not have been applied to - // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects - // the CFG of the given block. Make sure to call a dominator tree method that - // will flush this cache before touching the successors of the block. - MachineDomTreeNode *NodeMBB = nullptr; - if (DT) - NodeMBB = DT->getNode(&MBB); - - // Move everything to the new block, except the end_cf pseudo. - SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end()); - - SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB); - MBB.addSuccessor(SplitMBB, BranchProbability::getOne()); - - MBB.insert(MBB.end(), NewMI); - - if (DT) { - std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren(); - DT->addNewBlock(SplitMBB, &MBB); - - // Reparent all of the children to the new block body. - auto *SplitNode = DT->getNode(SplitMBB); - for (auto *Child : Children) - DT->changeImmediateDominator(Child, SplitNode); - } - - if (MLI) { - if (MachineLoop *Loop = MLI->getLoopFor(&MBB)) - Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase()); - } - - if (LIS) { - LIS->insertMBBInMaps(SplitMBB); - LIS->ReplaceMachineInstrInMaps(MI, *NewMI); - } - - // All live-ins are forwarded. - for (auto &LiveIn : MBB.liveins()) - SplitMBB->addLiveIn(LiveIn); - - MI.eraseFromParent(); - return SplitMBB; -} - void SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator InsPt = MBB.begin(); + MachineInstr *NewMI = + BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .add(MI.getOperand(0)); - // First, move the instruction. It's unnecessarily difficult to update - // LiveIntervals when there's a change in control flow, so move the - // instruction before changing the blocks. - MBB.splice(InsPt, &MBB, MI.getIterator()); if (LIS) - LIS->handleMove(MI); + LIS->ReplaceMachineInstrInMaps(MI, *NewMI); - MachineFunction *MF = MBB.getParent(); + MI.eraseFromParent(); - // Create instruction without inserting it yet. - MachineInstr *NewMI - = BuildMI(*MF, DL, TII->get(AMDGPU::S_OR_B64_term), AMDGPU::EXEC) - .addReg(AMDGPU::EXEC) - .add(MI.getOperand(0)); - insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI); + if (LIS) + LIS->handleMove(*NewMI); } // Returns replace operands for a logical operation, either single result @@ -550,20 +470,17 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable<LiveIntervals>(); - DT = getAnalysisIfAvailable<MachineDominatorTree>(); - MLI = getAnalysisIfAvailable<MachineLoopInfo>(); - MRI = &MF.getRegInfo(); MachineFunction::iterator NextBB; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; BI = NextBB) { NextBB = std::next(BI); - MachineBasicBlock *MBB = &*BI; + MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next, Last; - for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) { + for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; @@ -584,24 +501,10 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { emitLoop(MI); break; - case AMDGPU::SI_END_CF: { - MachineInstr *NextMI = nullptr; - - if (Next != MBB->end()) - NextMI = &*Next; - + case AMDGPU::SI_END_CF: emitEndCf(MI); - - if (NextMI) { - MBB = NextMI->getParent(); - Next = NextMI->getIterator(); - Last = MBB->end(); - } - - NextBB = std::next(MBB->getIterator()); - BE = MF.end(); break; - } + case AMDGPU::S_AND_B64: case AMDGPU::S_OR_B64: // Cleanup bit manipulations on exec mask @@ -615,7 +518,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { } // Replay newly inserted code to combine masks - Next = (Last == MBB->end()) ? MBB->begin() : Last; + Next = (Last == MBB.end()) ? MBB.begin() : Last; } } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 4ae64c5b990..c73066fe8d6 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -149,12 +149,6 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) { MI.setDesc(TII.get(AMDGPU::S_XOR_B64)); return true; } - case AMDGPU::S_OR_B64_term: { - // This is only a terminator to get the correct spill code placement during - // register allocation. - MI.setDesc(TII.get(AMDGPU::S_OR_B64)); - return true; - } case AMDGPU::S_ANDN2_B64_term: { // This is only a terminator to get the correct spill code placement during // register allocation. diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 6340615244c..b4a573a9dba 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -83,7 +83,7 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() { } static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) { - return MI.getOpcode() == AMDGPU::S_OR_B64_term && + return MI.getOpcode() == AMDGPU::S_OR_B64 && MI.modifiesRegister(AMDGPU::EXEC, TRI); } @@ -367,7 +367,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { // Try to collapse adjacent endifs. auto E = MBB.end(); - auto Lead = MBB.getFirstTerminator(); + auto Lead = skipDebugInstructionsForward(MBB.begin(), E); if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI)) continue; |

