diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-12 21:41:32 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-12 21:41:32 +0000 |
commit | 786724a22ecff6afa9484714be8448429fdd021c (patch) | |
tree | c672bbed3539107738cf537e5aefc08937593127 /llvm/lib/Target/AMDGPU | |
parent | 8950ad12adfdf6f13426171643a0b56e91dd7fd1 (diff) | |
download | bcm5719-llvm-786724a22ecff6afa9484714be8448429fdd021c.tar.gz bcm5719-llvm-786724a22ecff6afa9484714be8448429fdd021c.zip |
AMDGPU: Follow up to r275203
I meant to squash this into it.
llvm-svn: 275220
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 63 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 51 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 5 |
5 files changed, 101 insertions, 33 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d98fedbacb0..72175ea581b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1070,9 +1070,64 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT, + StringRef(RegName) + "\".")); } -MachineBasicBlock * -SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, - MachineBasicBlock *BB) const { +// If kill is not the last instruction, split the block so kill is always a +// proper terminator. +MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI, + MachineBasicBlock *BB) const { + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + + MachineBasicBlock::iterator SplitPoint(&MI); + ++SplitPoint; + + if (SplitPoint == BB->end()) { + // Don't bother with a new block. + MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR)); + return BB; + } + + MachineFunction *MF = BB->getParent(); + MachineBasicBlock *SplitBB + = MF->CreateMachineBasicBlock(BB->getBasicBlock()); + + SmallSet<unsigned, 8> SplitDefRegs; + for (auto I = SplitPoint, E = BB->end(); I != E; ++I) { + for (MachineOperand &Def : I->defs()) + SplitDefRegs.insert(Def.getReg()); + } + + // Fix the block phi references to point to the new block for the defs in the + // second piece of the block. + for (MachineBasicBlock *Succ : BB->successors()) { + for (MachineInstr &MI : *Succ) { + if (!MI.isPHI()) + break; + + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + unsigned IncomingReg = MI.getOperand(I).getReg(); + MachineOperand &FromBB = MI.getOperand(I + 1); + if (BB == FromBB.getMBB()) { + if (SplitDefRegs.count(IncomingReg)) + FromBB.setMBB(SplitBB); + + break; + } + } + } + } + + MF->insert(++MachineFunction::iterator(BB), SplitBB); + SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end()); + + + SplitBB->transferSuccessors(BB); + BB->addSuccessor(SplitBB); + + MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR)); + return SplitBB; +} + +MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { case AMDGPU::SI_INIT_M0: { const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); @@ -1096,6 +1151,8 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.eraseFromParent(); return BB; } + case AMDGPU::SI_KILL: + return splitKillBlock(MI, BB); default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 6833e15e4fd..8e055eea58c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -123,6 +123,9 @@ public: unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; + MachineBasicBlock *splitKillBlock(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 7cf5faa216d..858505bea3b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1989,8 +1989,16 @@ def SI_END_CF : PseudoInstSI < let Uses = [EXEC], Defs = [EXEC,VCC] in { def SI_KILL : PseudoInstSI < (outs), (ins VSrc_32:$src), - [(int_AMDGPU_kill f32:$src)] ->; + [(int_AMDGPU_kill f32:$src)]> { + let isConvergent = 1; + let usesCustomInserter = 1; +} + +def SI_KILL_TERMINATOR : PseudoInstSI < + (outs), (ins VSrc_32:$src)> { + let isTerminator = 1; +} + } // End Uses = [EXEC], Defs = [EXEC,VCC] } // End mayLoad = 1, mayStore = 1, hasSideEffects = 1 diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index f989b5b9bb0..adb0919231c 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -76,7 +76,7 @@ private: bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); void Skip(MachineInstr &From, MachineOperand &To); - bool skipIfDead(MachineInstr &MI); + bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB); void If(MachineInstr &MI); void Else(MachineInstr &MI, bool ExecModified); @@ -89,6 +89,9 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); + MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + std::pair<MachineBasicBlock *, MachineBasicBlock *> splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); @@ -205,27 +208,22 @@ void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) { .addOperand(To); } -bool SILowerControlFlow::skipIfDead(MachineInstr &MI) { +bool SILowerControlFlow::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction *MF = MBB.getParent(); - if (MBB.getParent()->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS || + if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS || !shouldSkip(&MBB, &MBB.getParent()->back())) return false; - LivePhysRegs RemainderLiveRegs(TRI); - RemainderLiveRegs.addLiveOuts(MBB); - - MachineBasicBlock *SkipBB; - MachineBasicBlock *RemainderBB; - std::tie(SkipBB, RemainderBB) = splitBlock(MBB, MI.getIterator()); + MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator()); + SkipBB->addSuccessor(&NextBB); const DebugLoc &DL = MI.getDebugLoc(); // If the exec mask is non-zero, skip the next two instructions BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addMBB(RemainderBB); - - MBB.addSuccessor(RemainderBB); + .addMBB(&NextBB); MachineBasicBlock::iterator Insert = SkipBB->begin(); @@ -244,15 +242,6 @@ bool SILowerControlFlow::skipIfDead(MachineInstr &MI) { // ... and terminate wavefront. BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); - for (const MachineInstr &Inst : reverse(*RemainderBB)) - RemainderLiveRegs.stepBackward(Inst); - - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - for (unsigned Reg : RemainderLiveRegs) { - if (MRI.isAllocatable(Reg)) - RemainderBB->addLiveIn(Reg); - } - return true; } @@ -495,6 +484,20 @@ void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, .addMBB(&LoopBB); } +MachineBasicBlock *SILowerControlFlow::insertSkipBlock( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + MachineFunction *MF = MBB.getParent(); + + MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock(); + MachineFunction::iterator MBBI(MBB); + ++MBBI; + + MF->insert(MBBI, SkipBB); + MBB.addSuccessor(SkipBB); + + return SkipBB; +} + std::pair<MachineBasicBlock *, MachineBasicBlock *> SILowerControlFlow::splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { @@ -745,7 +748,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { if (--Depth == 0 && HaveKill) { HaveKill = false; - if (skipIfDead(MI)) { + if (skipIfDead(MI, *NextBB)) { NextBB = std::next(BI); BE = MF.end(); Next = MBB.end(); @@ -754,9 +757,9 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { EndCf(MI); break; - case AMDGPU::SI_KILL: + case AMDGPU::SI_KILL_TERMINATOR: if (Depth == 0) { - if (skipIfDead(MI)) { + if (skipIfDead(MI, *NextBB)) { NextBB = std::next(BI); BE = MF.end(); Next = MBB.end(); diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 100fb2896dd..dafc772ea4f 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -223,7 +223,7 @@ void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI, // Control flow-type instructions that are followed by WQM computations // must themselves be in WQM. if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && - (MI.isBranch() || MI.isTerminator() || MI.getOpcode() == AMDGPU::SI_KILL)) { + (MI.isBranch() || MI.isTerminator())) { Instructions[&MI].Needs = StateWQM; II.Needs = StateWQM; } @@ -444,9 +444,6 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, State = Needs; } - - if (MI.getOpcode() == AMDGPU::SI_KILL) - WQMFromExec = false; } if ((BI.OutNeeds & StateWQM) && State != StateWQM) { |