diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-12 19:01:23 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-12 19:01:23 +0000 |
commit | 657f871a4e7e58781fa36fe9371483283c11b100 (patch) | |
tree | d4d1a45f60ce674e6614efdbbb630aaf25c393c7 /llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | |
parent | b9f8e292902b3fda8643f0cb06b575abccf83b46 (diff) | |
download | bcm5719-llvm-657f871a4e7e58781fa36fe9371483283c11b100.tar.gz bcm5719-llvm-657f871a4e7e58781fa36fe9371483283c11b100.zip |
AMDGPU: Fix verifier error with kill intrinsic
Don't create a terminator in the middle of the block.
We should probably get rid of this intrinsic.
llvm-svn: 275203
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 187 |
1 files changed, 122 insertions, 65 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 3d6fc9eeba7..f989b5b9bb0 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -76,7 +76,7 @@ private: bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); void Skip(MachineInstr &From, MachineOperand &To); - void SkipIfDead(MachineInstr &MI); + bool skipIfDead(MachineInstr &MI); void If(MachineInstr &MI); void Else(MachineInstr &MI, bool ExecModified); @@ -89,12 +89,16 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); - void splitBlockLiveIns(const MachineBasicBlock &MBB, - const MachineInstr &MI, - MachineBasicBlock &LoopBB, - MachineBasicBlock &RemainderBB, - unsigned SaveReg, - const MachineOperand &IdxReg); + std::pair<MachineBasicBlock *, MachineBasicBlock *> + splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + + void splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs, + const MachineRegisterInfo &MRI, + const MachineInstr &MI, + MachineBasicBlock &LoopBB, + MachineBasicBlock &RemainderBB, + unsigned SaveReg, + const MachineOperand &IdxReg); void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL, MachineInstr *MovRel, @@ -171,7 +175,19 @@ bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From, I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) return true; - if (++NumInstr >= SkipThreshold) + if (I->isInlineAsm()) { + const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); + const char *AsmStr = I->getOperand(0).getSymbolName(); + + // inlineasm length estimate is number of bytes assuming the longest + // instruction. + uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI); + NumInstr += MaxAsmSize / MAI->getMaxInstLength(); + } else { + ++NumInstr; + } + + if (NumInstr >= SkipThreshold) return true; } } @@ -189,36 +205,55 @@ void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) { .addOperand(To); } -void SILowerControlFlow::SkipIfDead(MachineInstr &MI) { - +bool SILowerControlFlow::skipIfDead(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); - DebugLoc DL = MI.getDebugLoc(); if (MBB.getParent()->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS || !shouldSkip(&MBB, &MBB.getParent()->back())) - return; + return false; - MachineBasicBlock::iterator Insert = &MI; - ++Insert; + LivePhysRegs RemainderLiveRegs(TRI); + RemainderLiveRegs.addLiveOuts(MBB); + + MachineBasicBlock *SkipBB; + MachineBasicBlock *RemainderBB; + std::tie(SkipBB, RemainderBB) = splitBlock(MBB, MI.getIterator()); + + const DebugLoc &DL = MI.getDebugLoc(); // If the exec mask is non-zero, skip the next two instructions - BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(3); + BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addMBB(RemainderBB); + + MBB.addSuccessor(RemainderBB); + + MachineBasicBlock::iterator Insert = SkipBB->begin(); // Exec mask is zero: Export to NULL target... - BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) - .addImm(0) - .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) - .addImm(1) - .addReg(AMDGPU::VGPR0) - .addReg(AMDGPU::VGPR0) - .addReg(AMDGPU::VGPR0) - .addReg(AMDGPU::VGPR0); - - // ... and terminate wavefront - BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); + BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP)) + .addImm(0) + .addImm(0x09) // V_008DFC_SQ_EXP_NULL + .addImm(0) + .addImm(1) + .addImm(1) + .addReg(AMDGPU::VGPR0, RegState::Undef) + .addReg(AMDGPU::VGPR0, RegState::Undef) + .addReg(AMDGPU::VGPR0, RegState::Undef) + .addReg(AMDGPU::VGPR0, RegState::Undef); + + // ... and terminate wavefront. + BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); + + for (const MachineInstr &Inst : reverse(*RemainderBB)) + RemainderLiveRegs.stepBackward(Inst); + + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + for (unsigned Reg : RemainderLiveRegs) { + if (MRI.isAllocatable(Reg)) + RemainderBB->addLiveIn(Reg); + } + + return true; } void SILowerControlFlow::If(MachineInstr &MI) { @@ -386,20 +421,13 @@ void SILowerControlFlow::Kill(MachineInstr &MI) { } // All currently live registers must remain so in the remainder block. -void SILowerControlFlow::splitBlockLiveIns(const MachineBasicBlock &MBB, - const MachineInstr &MI, - MachineBasicBlock &LoopBB, - MachineBasicBlock &RemainderBB, - unsigned SaveReg, - const MachineOperand &IdxReg) { - LivePhysRegs RemainderLiveRegs(TRI); - - RemainderLiveRegs.addLiveOuts(MBB); - for (MachineBasicBlock::const_reverse_iterator I = MBB.rbegin(), E(&MI); - I != E; ++I) { - RemainderLiveRegs.stepBackward(*I); - } - +void SILowerControlFlow::splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs, + const MachineRegisterInfo &MRI, + const MachineInstr &MI, + MachineBasicBlock &LoopBB, + MachineBasicBlock &RemainderBB, + unsigned SaveReg, + const MachineOperand &IdxReg) { // Add reg defined in loop body. RemainderLiveRegs.addReg(SaveReg); @@ -410,13 +438,11 @@ void SILowerControlFlow::splitBlockLiveIns(const MachineBasicBlock &MBB, } } - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); for (unsigned Reg : RemainderLiveRegs) { if (MRI.isAllocatable(Reg)) RemainderBB.addLiveIn(Reg); } - const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src); if (!Src->isUndef()) LoopBB.addLiveIn(Src->getReg()); @@ -469,6 +495,30 @@ void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, .addMBB(&LoopBB); } +std::pair<MachineBasicBlock *, MachineBasicBlock *> +SILowerControlFlow::splitBlock(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + MachineFunction *MF = MBB.getParent(); + + // To insert the loop we need to split the block. Move everything after this + // point to a new block, and insert a new empty block between the two. + MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock(); + MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock(); + MachineFunction::iterator MBBI(MBB); + ++MBBI; + + MF->insert(MBBI, LoopBB); + MF->insert(MBBI, RemainderBB); + + // Move the rest of the block into a new block. + RemainderBB->transferSuccessors(&MBB); + RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end()); + + MBB.addSuccessor(LoopBB); + + return std::make_pair(LoopBB, RemainderBB); +} + // Returns true if a new block was inserted. bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) { MachineBasicBlock &MBB = *MI.getParent(); @@ -492,7 +542,6 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs return false; } - MachineFunction &MF = *MBB.getParent(); MachineOperand *SaveOp = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); SaveOp->setIsDead(false); unsigned Save = SaveOp->getReg(); @@ -505,25 +554,24 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save) .addReg(AMDGPU::EXEC); - // To insert the loop we need to split the block. Move everything after this - // point to a new block, and insert a new empty block between the two. - MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock(); - MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock(); - MachineFunction::iterator MBBI(MBB); - ++MBBI; + LivePhysRegs RemainderLiveRegs(TRI); - MF.insert(MBBI, LoopBB); - MF.insert(MBBI, RemainderBB); + RemainderLiveRegs.addLiveOuts(MBB); - LoopBB->addSuccessor(LoopBB); - LoopBB->addSuccessor(RemainderBB); + MachineBasicBlock *LoopBB; + MachineBasicBlock *RemainderBB; - splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, *Idx); + std::tie(LoopBB, RemainderBB) = splitBlock(MBB, I); - // Move the rest of the block into a new block. - RemainderBB->transferSuccessors(&MBB); - RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end()); - MBB.addSuccessor(LoopBB); + for (const MachineInstr &Inst : reverse(*RemainderBB)) + RemainderLiveRegs.stepBackward(Inst); + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LoopBB->addSuccessor(RemainderBB); + LoopBB->addSuccessor(LoopBB); + + splitLoadM0BlockLiveIns(RemainderLiveRegs, MRI, MI, *LoopBB, + *RemainderBB, Save, *Idx); emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset); @@ -695,16 +743,25 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::SI_END_CF: if (--Depth == 0 && HaveKill) { - SkipIfDead(MI); HaveKill = false; + + if (skipIfDead(MI)) { + NextBB = std::next(BI); + BE = MF.end(); + Next = MBB.end(); + } } EndCf(MI); break; case AMDGPU::SI_KILL: - if (Depth == 0) - SkipIfDead(MI); - else + if (Depth == 0) { + if (skipIfDead(MI)) { + NextBB = std::next(BI); + BE = MF.end(); + Next = MBB.end(); + } + } else HaveKill = true; Kill(MI); break; |