diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 55 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 |
2 files changed, 63 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 85fa0a835e7..7df43eeb17e 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -119,6 +119,18 @@ private: /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG. void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + /// \param DPP The DPP instruction + /// \param SearchI The iterator to start look for hazards. + /// \param SearchMBB The basic block we are operating on. + /// \param WaitStates Then number of wait states that need to be inserted + /// When a hazard is detected. + void insertDPPWaitStates(MachineBasicBlock::iterator DPP, + MachineBasicBlock::reverse_iterator SearchI, + MachineBasicBlock *SearchMBB, + unsigned WaitStates); + + void insertDPPWaitStates(MachineBasicBlock::iterator DPP); + /// Return true if there are LGKM instrucitons that haven't been waited on /// yet. bool hasOutstandingLGKM() const; @@ -480,6 +492,45 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, } } +void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP, + MachineBasicBlock::reverse_iterator SearchI, + MachineBasicBlock *SearchMBB, + unsigned WaitStates) { + + MachineBasicBlock::reverse_iterator E = SearchMBB->rend(); + + for (; WaitStates > 0; --WaitStates, ++SearchI) { + + // If we have reached the start of the block, we need to check predecessors. + if (SearchI == E) { + for (MachineBasicBlock *Pred : SearchMBB->predecessors()) { + // We only need to check fall-through blocks. Branch instructions + // give us enough wait states. + if (Pred->getFirstTerminator() == Pred->end()) { + insertDPPWaitStates(DPP, Pred->rbegin(), Pred, WaitStates); + break; + } + } + return; + } + + for (MachineOperand &Op : SearchI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + + if (DPP->readsRegister(Op.getReg(), TRI)) { + TII->insertWaitStates(DPP, WaitStates); + return; + } + } + } +} + +void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP) { + MachineBasicBlock::reverse_iterator I(DPP); + insertDPPWaitStates(DPP, I, DPP->getParent(), 2); +} + // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" // around other non-memory instructions. bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { @@ -546,6 +597,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { } } + if (TII->isDPP(*I)) { + insertDPPWaitStates(I); + } + // Wait for everything before a barrier. if (I->getOpcode() == AMDGPU::S_BARRIER) Changes |= insertWait(MBB, I, LastIssued); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index f5ff7f1b191..4b6ee47a5c1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -301,6 +301,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; } + static bool isDPP(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::DPP; + } + + bool isDPP(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::DPP; + } + bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const; bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const; |