summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-07-12 19:01:23 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-07-12 19:01:23 +0000
commit657f871a4e7e58781fa36fe9371483283c11b100 (patch)
treed4d1a45f60ce674e6614efdbbb630aaf25c393c7 /llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
parentb9f8e292902b3fda8643f0cb06b575abccf83b46 (diff)
downloadbcm5719-llvm-657f871a4e7e58781fa36fe9371483283c11b100.tar.gz
bcm5719-llvm-657f871a4e7e58781fa36fe9371483283c11b100.zip
AMDGPU: Fix verifier error with kill intrinsic
Don't create a terminator in the middle of the block. We should probably get rid of this intrinsic. llvm-svn: 275203
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp187
1 files changed, 122 insertions, 65 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 3d6fc9eeba7..f989b5b9bb0 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -76,7 +76,7 @@ private:
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
void Skip(MachineInstr &From, MachineOperand &To);
- void SkipIfDead(MachineInstr &MI);
+ bool skipIfDead(MachineInstr &MI);
void If(MachineInstr &MI);
void Else(MachineInstr &MI, bool ExecModified);
@@ -89,12 +89,16 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
- void splitBlockLiveIns(const MachineBasicBlock &MBB,
- const MachineInstr &MI,
- MachineBasicBlock &LoopBB,
- MachineBasicBlock &RemainderBB,
- unsigned SaveReg,
- const MachineOperand &IdxReg);
+ std::pair<MachineBasicBlock *, MachineBasicBlock *>
+ splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+
+ void splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
+ const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ MachineBasicBlock &LoopBB,
+ MachineBasicBlock &RemainderBB,
+ unsigned SaveReg,
+ const MachineOperand &IdxReg);
void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL,
MachineInstr *MovRel,
@@ -171,7 +175,19 @@ bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From,
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
- if (++NumInstr >= SkipThreshold)
+ if (I->isInlineAsm()) {
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+ const char *AsmStr = I->getOperand(0).getSymbolName();
+
+ // inlineasm length estimate is number of bytes assuming the longest
+ // instruction.
+ uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
+ NumInstr += MaxAsmSize / MAI->getMaxInstLength();
+ } else {
+ ++NumInstr;
+ }
+
+ if (NumInstr >= SkipThreshold)
return true;
}
}
@@ -189,36 +205,55 @@ void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
.addOperand(To);
}
-void SILowerControlFlow::SkipIfDead(MachineInstr &MI) {
-
+bool SILowerControlFlow::skipIfDead(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = MI.getDebugLoc();
if (MBB.getParent()->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
!shouldSkip(&MBB, &MBB.getParent()->back()))
- return;
+ return false;
- MachineBasicBlock::iterator Insert = &MI;
- ++Insert;
+ LivePhysRegs RemainderLiveRegs(TRI);
+ RemainderLiveRegs.addLiveOuts(MBB);
+
+ MachineBasicBlock *SkipBB;
+ MachineBasicBlock *RemainderBB;
+ std::tie(SkipBB, RemainderBB) = splitBlock(MBB, MI.getIterator());
+
+ const DebugLoc &DL = MI.getDebugLoc();
// If the exec mask is non-zero, skip the next two instructions
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3);
+ BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addMBB(RemainderBB);
+
+ MBB.addSuccessor(RemainderBB);
+
+ MachineBasicBlock::iterator Insert = SkipBB->begin();
// Exec mask is zero: Export to NULL target...
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
- .addImm(0)
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0)
- .addReg(AMDGPU::VGPR0);
-
- // ... and terminate wavefront
- BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+ BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef);
+
+ // ... and terminate wavefront.
+ BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+
+ for (const MachineInstr &Inst : reverse(*RemainderBB))
+ RemainderLiveRegs.stepBackward(Inst);
+
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (unsigned Reg : RemainderLiveRegs) {
+ if (MRI.isAllocatable(Reg))
+ RemainderBB->addLiveIn(Reg);
+ }
+
+ return true;
}
void SILowerControlFlow::If(MachineInstr &MI) {
@@ -386,20 +421,13 @@ void SILowerControlFlow::Kill(MachineInstr &MI) {
}
// All currently live registers must remain so in the remainder block.
-void SILowerControlFlow::splitBlockLiveIns(const MachineBasicBlock &MBB,
- const MachineInstr &MI,
- MachineBasicBlock &LoopBB,
- MachineBasicBlock &RemainderBB,
- unsigned SaveReg,
- const MachineOperand &IdxReg) {
- LivePhysRegs RemainderLiveRegs(TRI);
-
- RemainderLiveRegs.addLiveOuts(MBB);
- for (MachineBasicBlock::const_reverse_iterator I = MBB.rbegin(), E(&MI);
- I != E; ++I) {
- RemainderLiveRegs.stepBackward(*I);
- }
-
+void SILowerControlFlow::splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
+ const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ MachineBasicBlock &LoopBB,
+ MachineBasicBlock &RemainderBB,
+ unsigned SaveReg,
+ const MachineOperand &IdxReg) {
// Add reg defined in loop body.
RemainderLiveRegs.addReg(SaveReg);
@@ -410,13 +438,11 @@ void SILowerControlFlow::splitBlockLiveIns(const MachineBasicBlock &MBB,
}
}
- const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
for (unsigned Reg : RemainderLiveRegs) {
if (MRI.isAllocatable(Reg))
RemainderBB.addLiveIn(Reg);
}
-
const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src);
if (!Src->isUndef())
LoopBB.addLiveIn(Src->getReg());
@@ -469,6 +495,30 @@ void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB,
.addMBB(&LoopBB);
}
+std::pair<MachineBasicBlock *, MachineBasicBlock *>
+SILowerControlFlow::splitBlock(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineFunction *MF = MBB.getParent();
+
+ // To insert the loop we need to split the block. Move everything after this
+ // point to a new block, and insert a new empty block between the two.
+ MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+
+ MF->insert(MBBI, LoopBB);
+ MF->insert(MBBI, RemainderBB);
+
+ // Move the rest of the block into a new block.
+ RemainderBB->transferSuccessors(&MBB);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+
+ MBB.addSuccessor(LoopBB);
+
+ return std::make_pair(LoopBB, RemainderBB);
+}
+
// Returns true if a new block was inserted.
bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -492,7 +542,6 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs
return false;
}
- MachineFunction &MF = *MBB.getParent();
MachineOperand *SaveOp = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
SaveOp->setIsDead(false);
unsigned Save = SaveOp->getReg();
@@ -505,25 +554,24 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save)
.addReg(AMDGPU::EXEC);
- // To insert the loop we need to split the block. Move everything after this
- // point to a new block, and insert a new empty block between the two.
- MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
- MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
- MachineFunction::iterator MBBI(MBB);
- ++MBBI;
+ LivePhysRegs RemainderLiveRegs(TRI);
- MF.insert(MBBI, LoopBB);
- MF.insert(MBBI, RemainderBB);
+ RemainderLiveRegs.addLiveOuts(MBB);
- LoopBB->addSuccessor(LoopBB);
- LoopBB->addSuccessor(RemainderBB);
+ MachineBasicBlock *LoopBB;
+ MachineBasicBlock *RemainderBB;
- splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, *Idx);
+ std::tie(LoopBB, RemainderBB) = splitBlock(MBB, I);
- // Move the rest of the block into a new block.
- RemainderBB->transferSuccessors(&MBB);
- RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
- MBB.addSuccessor(LoopBB);
+ for (const MachineInstr &Inst : reverse(*RemainderBB))
+ RemainderLiveRegs.stepBackward(Inst);
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LoopBB->addSuccessor(RemainderBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ splitLoadM0BlockLiveIns(RemainderLiveRegs, MRI, MI, *LoopBB,
+ *RemainderBB, Save, *Idx);
emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset);
@@ -695,16 +743,25 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_END_CF:
if (--Depth == 0 && HaveKill) {
- SkipIfDead(MI);
HaveKill = false;
+
+ if (skipIfDead(MI)) {
+ NextBB = std::next(BI);
+ BE = MF.end();
+ Next = MBB.end();
+ }
}
EndCf(MI);
break;
case AMDGPU::SI_KILL:
- if (Depth == 0)
- SkipIfDead(MI);
- else
+ if (Depth == 0) {
+ if (skipIfDead(MI)) {
+ NextBB = std::next(BI);
+ BE = MF.end();
+ Next = MBB.end();
+ }
+ } else
HaveKill = true;
Kill(MI);
break;
OpenPOWER on IntegriCloud