summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaits.cpp55
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h8
2 files changed, 63 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 85fa0a835e7..7df43eeb17e 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -119,6 +119,18 @@ private:
/// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+ /// \param DPP The DPP instruction
+ /// \param SearchI The iterator to start look for hazards.
+ /// \param SearchMBB The basic block we are operating on.
+ /// \param WaitStates Then number of wait states that need to be inserted
+ /// When a hazard is detected.
+ void insertDPPWaitStates(MachineBasicBlock::iterator DPP,
+ MachineBasicBlock::reverse_iterator SearchI,
+ MachineBasicBlock *SearchMBB,
+ unsigned WaitStates);
+
+ void insertDPPWaitStates(MachineBasicBlock::iterator DPP);
+
/// Return true if there are LGKM instrucitons that haven't been waited on
/// yet.
bool hasOutstandingLGKM() const;
@@ -480,6 +492,45 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
}
}
+void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP,
+ MachineBasicBlock::reverse_iterator SearchI,
+ MachineBasicBlock *SearchMBB,
+ unsigned WaitStates) {
+
+ MachineBasicBlock::reverse_iterator E = SearchMBB->rend();
+
+ for (; WaitStates > 0; --WaitStates, ++SearchI) {
+
+ // If we have reached the start of the block, we need to check predecessors.
+ if (SearchI == E) {
+ for (MachineBasicBlock *Pred : SearchMBB->predecessors()) {
+ // We only need to check fall-through blocks. Branch instructions
+ // give us enough wait states.
+ if (Pred->getFirstTerminator() == Pred->end()) {
+ insertDPPWaitStates(DPP, Pred->rbegin(), Pred, WaitStates);
+ break;
+ }
+ }
+ return;
+ }
+
+ for (MachineOperand &Op : SearchI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+
+ if (DPP->readsRegister(Op.getReg(), TRI)) {
+ TII->insertWaitStates(DPP, WaitStates);
+ return;
+ }
+ }
+ }
+}
+
+void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP) {
+ MachineBasicBlock::reverse_iterator I(DPP);
+ insertDPPWaitStates(DPP, I, DPP->getParent(), 2);
+}
+
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
@@ -546,6 +597,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (TII->isDPP(*I)) {
+ insertDPPWaitStates(I);
+ }
+
// Wait for everything before a barrier.
if (I->getOpcode() == AMDGPU::S_BARRIER)
Changes |= insertWait(MBB, I, LastIssued);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index f5ff7f1b191..4b6ee47a5c1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -301,6 +301,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
}
+ static bool isDPP(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::DPP;
+ }
+
+ bool isDPP(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::DPP;
+ }
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
OpenPOWER on IntegriCloud