diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 55 |
3 files changed, 27 insertions, 55 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 725727979e9..58a9a286e30 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -47,6 +47,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) return NoopHazard; + if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) + return NoopHazard; + return NoHazard; } @@ -61,6 +64,9 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (SIInstrInfo::isVMEM(*MI)) return std::max(0, checkVMEMHazards(MI)); + if (SIInstrInfo::isDPP(*MI)) + return std::max(0, checkDPPHazards(MI)); + return 0; } @@ -175,3 +181,23 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { } return WaitStatesNeeded; } + +int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { + const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SIRegisterInfo *TRI = + static_cast<const SIRegisterInfo*>(ST.getRegisterInfo()); + + // Check for DPP VGPR read after VALU VGPR write. + int DppVgprWaitStates = 2; + int WaitStatesNeeded = 0; + + for (const MachineOperand &Use : DPP->uses()) { + if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) + continue; + int WaitStatesNeededForUse = + DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg()); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + } + + return WaitStatesNeeded; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index e75c35032ff..4ab2480acf3 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -40,6 +40,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkSMRDHazards(MachineInstr *SMRD); int checkVMEMHazards(MachineInstr* VMEM); + int checkDPPHazards(MachineInstr *DPP); public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index ead4c9ebf18..75adb2b767f 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -127,18 +127,6 @@ private: /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG. void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - /// \param DPP The DPP instruction - /// \param SearchI The iterator to start look for hazards. - /// \param SearchMBB The basic block we are operating on. - /// \param WaitStates Then number of wait states that need to be inserted - /// When a hazard is detected. - void insertDPPWaitStates(MachineBasicBlock::iterator DPP, - MachineBasicBlock::reverse_iterator SearchI, - MachineBasicBlock *SearchMBB, - unsigned WaitStates); - - void insertDPPWaitStates(MachineBasicBlock::iterator DPP); - /// Return true if there are LGKM instrucitons that haven't been waited on /// yet. bool hasOutstandingLGKM() const; @@ -522,45 +510,6 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, } } -void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP, - MachineBasicBlock::reverse_iterator SearchI, - MachineBasicBlock *SearchMBB, - unsigned WaitStates) { - - MachineBasicBlock::reverse_iterator E = SearchMBB->rend(); - - for (; WaitStates > 0; --WaitStates, ++SearchI) { - - // If we have reached the start of the block, we need to check predecessors. - if (SearchI == E) { - for (MachineBasicBlock *Pred : SearchMBB->predecessors()) { - // We only need to check fall-through blocks. Branch instructions - // give us enough wait states. - if (Pred->getFirstTerminator() == Pred->end()) { - insertDPPWaitStates(DPP, Pred->rbegin(), Pred, WaitStates); - break; - } - } - return; - } - - for (MachineOperand &Op : SearchI->operands()) { - if (!Op.isReg() || !Op.isDef()) - continue; - - if (DPP->readsRegister(Op.getReg(), TRI)) { - TII->insertWaitStates(*DPP->getParent(), DPP, WaitStates); - return; - } - } - } -} - -void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP) { - MachineBasicBlock::reverse_iterator I(DPP); - insertDPPWaitStates(DPP, I, DPP->getParent(), 2); -} - // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" // around other non-memory instructions. bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { @@ -630,10 +579,6 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { } } - if (TII->isDPP(*I)) { - insertDPPWaitStates(I); - } - // Record pre-existing, explicitly requested waits if (I->getOpcode() == AMDGPU::S_WAITCNT) { handleExistingWait(*I); |