summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-07-12 21:41:32 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-07-12 21:41:32 +0000
commit786724a22ecff6afa9484714be8448429fdd021c (patch)
treec672bbed3539107738cf537e5aefc08937593127 /llvm/lib/Target/AMDGPU
parent8950ad12adfdf6f13426171643a0b56e91dd7fd1 (diff)
downloadbcm5719-llvm-786724a22ecff6afa9484714be8448429fdd021c.tar.gz
bcm5719-llvm-786724a22ecff6afa9484714be8448429fdd021c.zip
AMDGPU: Follow up to r275203
I meant to squash this into it. llvm-svn: 275220
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp63
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td12
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp51
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp5
5 files changed, 101 insertions, 33 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d98fedbacb0..72175ea581b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1070,9 +1070,64 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ StringRef(RegName) + "\"."));
}
-MachineBasicBlock *
-SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
- MachineBasicBlock *BB) const {
+// If kill is not the last instruction, split the block so kill is always a
+// proper terminator.
+MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+ MachineBasicBlock::iterator SplitPoint(&MI);
+ ++SplitPoint;
+
+ if (SplitPoint == BB->end()) {
+ // Don't bother with a new block.
+ MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR));
+ return BB;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ MachineBasicBlock *SplitBB
+ = MF->CreateMachineBasicBlock(BB->getBasicBlock());
+
+ SmallSet<unsigned, 8> SplitDefRegs;
+ for (auto I = SplitPoint, E = BB->end(); I != E; ++I) {
+ for (MachineOperand &Def : I->defs())
+ SplitDefRegs.insert(Def.getReg());
+ }
+
+ // Fix the block phi references to point to the new block for the defs in the
+ // second piece of the block.
+ for (MachineBasicBlock *Succ : BB->successors()) {
+ for (MachineInstr &MI : *Succ) {
+ if (!MI.isPHI())
+ break;
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ unsigned IncomingReg = MI.getOperand(I).getReg();
+ MachineOperand &FromBB = MI.getOperand(I + 1);
+ if (BB == FromBB.getMBB()) {
+ if (SplitDefRegs.count(IncomingReg))
+ FromBB.setMBB(SplitBB);
+
+ break;
+ }
+ }
+ }
+ }
+
+ MF->insert(++MachineFunction::iterator(BB), SplitBB);
+ SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end());
+
+
+ SplitBB->transferSuccessors(BB);
+ BB->addSuccessor(SplitBB);
+
+ MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR));
+ return SplitBB;
+}
+
+MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
+ MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
case AMDGPU::SI_INIT_M0: {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
@@ -1096,6 +1151,8 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent();
return BB;
}
+ case AMDGPU::SI_KILL:
+ return splitKillBlock(MI, BB);
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 6833e15e4fd..8e055eea58c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -123,6 +123,9 @@ public:
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ MachineBasicBlock *splitKillBlock(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7cf5faa216d..858505bea3b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1989,8 +1989,16 @@ def SI_END_CF : PseudoInstSI <
let Uses = [EXEC], Defs = [EXEC,VCC] in {
def SI_KILL : PseudoInstSI <
(outs), (ins VSrc_32:$src),
- [(int_AMDGPU_kill f32:$src)]
->;
+ [(int_AMDGPU_kill f32:$src)]> {
+ let isConvergent = 1;
+ let usesCustomInserter = 1;
+}
+
+def SI_KILL_TERMINATOR : PseudoInstSI <
+ (outs), (ins VSrc_32:$src)> {
+ let isTerminator = 1;
+}
+
} // End Uses = [EXEC], Defs = [EXEC,VCC]
} // End mayLoad = 1, mayStore = 1, hasSideEffects = 1
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index f989b5b9bb0..adb0919231c 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -76,7 +76,7 @@ private:
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
void Skip(MachineInstr &From, MachineOperand &To);
- bool skipIfDead(MachineInstr &MI);
+ bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
void If(MachineInstr &MI);
void Else(MachineInstr &MI, bool ExecModified);
@@ -89,6 +89,9 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
std::pair<MachineBasicBlock *, MachineBasicBlock *>
splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
@@ -205,27 +208,22 @@ void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
.addOperand(To);
}
-bool SILowerControlFlow::skipIfDead(MachineInstr &MI) {
+bool SILowerControlFlow::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction *MF = MBB.getParent();
- if (MBB.getParent()->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
+ if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
!shouldSkip(&MBB, &MBB.getParent()->back()))
return false;
- LivePhysRegs RemainderLiveRegs(TRI);
- RemainderLiveRegs.addLiveOuts(MBB);
-
- MachineBasicBlock *SkipBB;
- MachineBasicBlock *RemainderBB;
- std::tie(SkipBB, RemainderBB) = splitBlock(MBB, MI.getIterator());
+ MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
+ SkipBB->addSuccessor(&NextBB);
const DebugLoc &DL = MI.getDebugLoc();
// If the exec mask is non-zero, skip the next two instructions
BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addMBB(RemainderBB);
-
- MBB.addSuccessor(RemainderBB);
+ .addMBB(&NextBB);
MachineBasicBlock::iterator Insert = SkipBB->begin();
@@ -244,15 +242,6 @@ bool SILowerControlFlow::skipIfDead(MachineInstr &MI) {
// ... and terminate wavefront.
BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
- for (const MachineInstr &Inst : reverse(*RemainderBB))
- RemainderLiveRegs.stepBackward(Inst);
-
- const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- for (unsigned Reg : RemainderLiveRegs) {
- if (MRI.isAllocatable(Reg))
- RemainderBB->addLiveIn(Reg);
- }
-
return true;
}
@@ -495,6 +484,20 @@ void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB,
.addMBB(&LoopBB);
}
+MachineBasicBlock *SILowerControlFlow::insertSkipBlock(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+ MachineFunction *MF = MBB.getParent();
+
+ MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+
+ MF->insert(MBBI, SkipBB);
+ MBB.addSuccessor(SkipBB);
+
+ return SkipBB;
+}
+
std::pair<MachineBasicBlock *, MachineBasicBlock *>
SILowerControlFlow::splitBlock(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
@@ -745,7 +748,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
if (--Depth == 0 && HaveKill) {
HaveKill = false;
- if (skipIfDead(MI)) {
+ if (skipIfDead(MI, *NextBB)) {
NextBB = std::next(BI);
BE = MF.end();
Next = MBB.end();
@@ -754,9 +757,9 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
EndCf(MI);
break;
- case AMDGPU::SI_KILL:
+ case AMDGPU::SI_KILL_TERMINATOR:
if (Depth == 0) {
- if (skipIfDead(MI)) {
+ if (skipIfDead(MI, *NextBB)) {
NextBB = std::next(BI);
BE = MF.end();
Next = MBB.end();
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 100fb2896dd..dafc772ea4f 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -223,7 +223,7 @@ void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
// Control flow-type instructions that are followed by WQM computations
// must themselves be in WQM.
if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) &&
- (MI.isBranch() || MI.isTerminator() || MI.getOpcode() == AMDGPU::SI_KILL)) {
+ (MI.isBranch() || MI.isTerminator())) {
Instructions[&MI].Needs = StateWQM;
II.Needs = StateWQM;
}
@@ -444,9 +444,6 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
State = Needs;
}
-
- if (MI.getOpcode() == AMDGPU::SI_KILL)
- WQMFromExec = false;
}
if ((BI.OutNeeds & StateWQM) && State != StateWQM) {
OpenPOWER on IntegriCloud