diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 127 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 |
6 files changed, 124 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 7cfdb5cfb73..a0d69a999ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -556,6 +556,10 @@ public: return SGPRInitBug; } + bool has12DWordStoreHazard() const { + return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; + } + unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const; /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 3a9de0df22c..c8df1b3d2d1 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1164,6 +1164,7 @@ defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_si <0x5a>; defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_si <0x5b>; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_si <0x5c>; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_si <0x5d>; +// FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI. //defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomic_si <0x5e">; // isn't on VI //defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomic_si <0x5f>; // isn't on VI //defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomic_si <0x60>; // isn't on VI diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 100ea7e9a2d..7086e86f6a8 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -67,6 +67,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) return NoopHazard; + if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) + return NoopHazard; + if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) return NoopHazard; @@ -90,14 +93,20 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (SIInstrInfo::isSMRD(*MI)) return std::max(0, checkSMRDHazards(MI)); - if (SIInstrInfo::isVMEM(*MI)) - return std::max(0, checkVMEMHazards(MI)); + if (SIInstrInfo::isVALU(*MI)) { + int WaitStates = std::max(0, checkVALUHazards(MI)); - if (SIInstrInfo::isDPP(*MI)) - return std::max(0, checkDPPHazards(MI)); + if (SIInstrInfo::isVMEM(*MI)) + WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); - if (isDivFMas(MI->getOpcode())) - return std::max(0, checkDivFMasHazards(MI)); + if (SIInstrInfo::isDPP(*MI)) + WaitStates = std::max(WaitStates, checkDPPHazards(MI)); + + if (isDivFMas(MI->getOpcode())) + WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); + + return WaitStates; + } if (isSGetReg(MI->getOpcode())) return std::max(0, checkGetRegHazards(MI)); @@ -149,32 +158,38 @@ void GCNHazardRecognizer::RecedeCycle() { // Helper Functions //===----------------------------------------------------------------------===// -int GCNHazardRecognizer::getWaitStatesSinceDef( - unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) { - const SIRegisterInfo *TRI = ST.getRegisterInfo(); +int GCNHazardRecognizer::getWaitStatesSince( + function_ref<bool(MachineInstr *)> IsHazard) { int WaitStates = -1; for (MachineInstr *MI : EmittedInstrs) { ++WaitStates; - if (!MI || !IsHazardDef(MI)) + if (!MI || !IsHazard(MI)) continue; - if (MI->modifiesRegister(Reg, TRI)) - return WaitStates; + return WaitStates; } return std::numeric_limits<int>::max(); } +int GCNHazardRecognizer::getWaitStatesSinceDef( + unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) { + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { + return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); + }; + + return getWaitStatesSince(IsHazardFn); +} + int GCNHazardRecognizer::getWaitStatesSinceSetReg( function_ref<bool(MachineInstr *)> IsHazard) { - int WaitStates = -1; - for (MachineInstr *MI : EmittedInstrs) { - ++WaitStates; - if (!MI || !isSSetReg(MI->getOpcode()) || !IsHazard(MI)) - continue; - return WaitStates; - } - return std::numeric_limits<int>::max(); + auto IsHazardFn = [IsHazard] (MachineInstr *MI) { + return isSSetReg(MI->getOpcode()) && IsHazard(MI); + }; + + return getWaitStatesSince(IsHazardFn); } //===----------------------------------------------------------------------===// @@ -350,3 +365,75 @@ int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); return SetRegWaitStates - WaitStatesNeeded; } + +int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { + if (!MI.mayStore()) + return -1; + + const SIInstrInfo *TII = ST.getInstrInfo(); + unsigned Opcode = MI.getOpcode(); + const MCInstrDesc &Desc = MI.getDesc(); + + int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); + int VDataRCID = -1; + if (VDataIdx != -1) + VDataRCID = Desc.OpInfo[VDataIdx].RegClass; + + if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { + // For MUBUF/MTBUF instructions this hazard only exists if the + // instruction is not using a register in the soffset field. + const MachineOperand *SOffset = + TII->getNamedOperand(MI, AMDGPU::OpName::soffset); + // If we have no soffset operand, then assume this field has been + // hardcoded to zero. + if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && + (!SOffset || !SOffset->isReg())) + return VDataIdx; + } + + // MIMG instructions create a hazard if they don't use a 256-bit T# and + // the store size is greater than 8 bytes and they have more than two bits + // of their dmask set. + // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. + if (TII->isMIMG(MI)) { + int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); + assert(SRsrcIdx != -1 && + AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); + } + + if (TII->isFLAT(MI)) { + int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::data); + if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) + return DataIdx; + } + + return -1; +} + +int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { + // This checks for the hazard where VMEM instructions that store more than + // 8 bytes can have there store data over written by the next instruction. + if (!ST.has12DWordStoreHazard()) + return 0; + + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo(); + + const int VALUWaitStates = 1; + int WaitStatesNeeded = 0; + + for (const MachineOperand &Def : VALU->defs()) { + if (!TRI->isVGPR(MRI, Def.getReg())) + continue; + unsigned Reg = Def.getReg(); + auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { + int DataIdx = createsVALUHazard(*MI); + return DataIdx >= 0 && + TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); + }; + int WaitStatesNeededForDef = + VALUWaitStates - getWaitStatesSince(IsHazardFn); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + } + return WaitStatesNeeded; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 58831adc37f..f0882d05ffe 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -35,6 +35,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { const MachineFunction &MF; const SISubtarget &ST; + int getWaitStatesSince(function_ref<bool(MachineInstr *)> IsHazard); int getWaitStatesSinceDef(unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef = [](MachineInstr *) { return true; }); @@ -47,6 +48,8 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkDivFMasHazards(MachineInstr *DivFMas); int checkGetRegHazards(MachineInstr *GetRegInstr); int checkSetRegHazards(MachineInstr *SetRegInstr); + int createsVALUHazard(const MachineInstr &MI); + int checkVALUHazards(MachineInstr *VALU); public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 04cf7b83c17..fb27675b0f2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -352,8 +352,8 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { // Avoid using MCRegisterClass::getSize, since that function will go away // (move from MC* level to Target* level). Return size in bits. -unsigned getRegBitWidth(const MCRegisterClass &RC) { - switch (RC.getID()) { +unsigned getRegBitWidth(unsigned RCID) { + switch (RCID) { case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: case AMDGPU::VS_32RegClassID: @@ -382,6 +382,10 @@ unsigned getRegBitWidth(const MCRegisterClass &RC) { } } +unsigned getRegBitWidth(const MCRegisterClass &RC) { + return getRegBitWidth(RC.getID()); +} + unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo) { unsigned RCID = Desc.OpInfo[OpNo].RegClass; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 28e480b6f2a..484f681ab7d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -158,6 +158,9 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); /// \brief Get the size in bits of a register from the register class \p RC. +unsigned getRegBitWidth(unsigned RCID); + +/// \brief Get the size in bits of a register from the register class \p RC. unsigned getRegBitWidth(const MCRegisterClass &RC); /// \brief Get size of register operand |

