diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 182 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 59 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 27 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SISchedule.td | 1 |
9 files changed, 324 insertions, 18 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 13abe7f1343..342afffea6d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -384,6 +384,17 @@ void GCNPassConfig::addPreSched2() { } void GCNPassConfig::addPreEmitPass() { + + // The hazard recognizer that runs as part of the post-ra scheduler does not + // gaurantee to be able handle all hazards correctly. This is because + // if there are multiple scheduling regions in a basic block, the regions + // are scheduled bottom up, so when we begin to schedule a region we don't + // know what instructions were emitted directly before it. + // + // Here we add a stand-alone hazard recognizer pass which can handle all cases. + // hazard recognizer pass. + addPass(&PostRAHazardRecognizerID); + addPass(createSIInsertWaitsPass(), false); addPass(createSIShrinkInstructionsPass()); addPass(createSILowerControlFlowPass(), false); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index ef68e952fa8..d09791f06e4 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp AMDGPURegisterInfo.cpp + GCNHazardRecognizer.cpp R600ClauseMergePass.cpp R600ControlFlowFinalizer.cpp R600EmitClauseMarkers.cpp diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp new file mode 100644 index 00000000000..4c830bc65a2 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -0,0 +1,182 @@ +//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizers for scheduling on GCN processors. +// +//===----------------------------------------------------------------------===// + +#include "GCNHazardRecognizer.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Hazard Recoginizer Implementation +//===----------------------------------------------------------------------===// + +GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : + CurrCycleInstr(nullptr), + MF(MF) { + MaxLookAhead = 5; +} + +void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { + EmitInstruction(SU->getInstr()); +} + +void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { + CurrCycleInstr = MI; +} + +ScheduleHazardRecognizer::HazardType +GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + const SIInstrInfo *TII = + static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo()); + MachineInstr *MI = SU->getInstr(); + + if (TII->isSMRD(*MI) && checkSMRDHazards(MI) > 0) + return NoopHazard; + + if (TII->isVMEM(*MI) && checkVMEMHazards(MI) > 0) + return NoopHazard; + + return NoHazard; +} + +unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { + return PreEmitNoops(SU->getInstr()); +} + +unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { + const SIInstrInfo *TII = + static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo()); + + if (TII->isSMRD(*MI)) + return std::max(0, checkSMRDHazards(MI)); + + if (TII->isVMEM(*MI)) + return std::max(0, checkVMEMHazards(MI)); + + return 0; +} + +void GCNHazardRecognizer::EmitNoop() { + EmittedInstrs.push_front(nullptr); +} + +void GCNHazardRecognizer::AdvanceCycle() { + + // When the scheduler detects a stall, it will call AdvanceCycle() without + // emitting any instructions. + if (!CurrCycleInstr) + return; + + const SIInstrInfo *TII = + static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo()); + unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr); + + // Keep track of emitted instructions + EmittedInstrs.push_front(CurrCycleInstr); + + // Add a nullptr for each additional wait state after the first. Make sure + // not to add more than getMaxLookAhead() items to the list, since we + // truncate the list to that size right after this loop. + for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); + i < e; ++i) { + EmittedInstrs.push_front(nullptr); + } + + // getMaxLookahead() is the largest number of wait states we will ever need + // to insert, so there is no point in keeping track of more than that many + // wait states. + EmittedInstrs.resize(getMaxLookAhead()); + + CurrCycleInstr = nullptr; +} + +void GCNHazardRecognizer::RecedeCycle() { + llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg, + std::function<bool(MachineInstr*)> IsHazardDef ) { + const TargetRegisterInfo *TRI = + MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo(); + + int WaitStates = -1; + for (MachineInstr *MI : EmittedInstrs) { + ++WaitStates; + if (!MI || !IsHazardDef(MI)) + continue; + if (MI->modifiesRegister(Reg, TRI)) + return WaitStates; + } + return std::numeric_limits<int>::max(); +} + +//===----------------------------------------------------------------------===// +// No-op Hazard Detection +//===----------------------------------------------------------------------===// + +int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { + const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); + + // This SMRD hazard only affects SI. + if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 0; + + // A read of an SGPR by SMRD instruction requires 4 wait states when the + // SGPR was written by a VALU instruction. + int SmrdSgprWaitStates = 4; + int WaitStatesNeeded = 0; + auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; + + for (const MachineOperand &Use : SMRD->uses()) { + if (!Use.isReg()) + continue; + int WaitStatesNeededForUse = + SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + } + return WaitStatesNeeded; +} + +int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { + const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); + const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); + + if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + return 0; + + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + + // A read of an SGPR by a VMEM instruction requires 5 wait states when the + // SGPR was written by a VALU Instruction. + int VmemSgprWaitStates = 5; + int WaitStatesNeeded = 0; + auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; + + for (const MachineOperand &Use : VMEM->uses()) { + if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) + continue; + + int WaitStatesNeededForUse = + VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + } + return WaitStatesNeeded; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h new file mode 100644 index 00000000000..e75c35032ff --- /dev/null +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -0,0 +1,59 @@ +//===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling on GCN processors. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H +#define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include <functional> +#include <list> + +namespace llvm { + +class MachineFunction; +class MachineInstr; +class ScheduleDAG; +class SIInstrInfo; + +class GCNHazardRecognizer final : public ScheduleHazardRecognizer { + + // This variable stores the instruction that has been emitted this cycle. + // It will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is + // called. + MachineInstr *CurrCycleInstr; + std::list<MachineInstr*> EmittedInstrs; + const MachineFunction &MF; + + int getWaitStatesSinceDef(unsigned Reg, + std::function<bool(MachineInstr*)> IsHazardDef = + [](MachineInstr*) {return true;}); + + int checkSMRDHazards(MachineInstr *SMRD); + int checkVMEMHazards(MachineInstr* VMEM); +public: + GCNHazardRecognizer(const MachineFunction &MF); + // We can only issue one instruction per cycle. + bool atIssueLimit() const override { return true; } + void EmitInstruction(SUnit *SU) override; + void EmitInstruction(MachineInstr *MI) override; + HazardType getHazardType(SUnit *SU, int Stalls) override; + void EmitNoop() override; + unsigned PreEmitNoops(SUnit *SU) override; + unsigned PreEmitNoops(MachineInstr *) override; + void AdvanceCycle() override; + void RecedeCycle() override; +}; + +} // end namespace llvm + +#endif //LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 2ab4f78a49b..eb17ffedff6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -15,11 +15,13 @@ #include "SIInstrInfo.h" #include "AMDGPUTargetMachine.h" +#include "GCNHazardRecognizer.h" #include "SIDefines.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/Function.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCInstrDesc.h" @@ -816,6 +818,20 @@ void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB, } } +void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + insertWaitStates(MBB, MI, 1); +} + +unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: return 1; // FIXME: Do wait states equal cycles? + + case AMDGPU::S_NOP: + return MI.getOperand(0).getImm() + 1; + } +} + bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MBB.findDebugLoc(MI); @@ -1188,8 +1204,11 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { - assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() && - "read2 / write2 not expected here yet"); + + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) { + // FIXME: Handle ds_read2 / ds_write2. + return false; + } unsigned Width0 = (*MIa->memoperands_begin())->getSize(); unsigned Width1 = (*MIb->memoperands_begin())->getSize(); if (BaseReg0 == BaseReg1 && @@ -2964,3 +2983,18 @@ SIInstrInfo::getSerializableTargetIndices() const { {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; return makeArrayRef(TargetIndices); } + +/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The +/// post-RA version of misched uses CreateTargetMIHazardRecognizer. +ScheduleHazardRecognizer * +SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return new GCNHazardRecognizer(DAG->MF); +} + +/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer +/// pass. +ScheduleHazardRecognizer * +SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { + return new GCNHazardRecognizer(MF); +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a5cd2e18ef4..2121ae1e201 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -169,6 +169,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::VALU; } + static bool isVMEM(const MachineInstr &MI) { + return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI); + } + + bool isVMEM(uint16_t Opcode) const { + return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode); + } + static bool isSOP1(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::SOP1; } @@ -440,6 +448,12 @@ public: void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI, int Count) const; + void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + + /// \brief Return the number of wait states that result from executing this + /// instruction. + unsigned getNumWaitStates(const MachineInstr &MI) const; + /// \brief Returns the operand named \p Op. If \p MI does not have an /// operand named \c Op, this function returns nullptr. LLVM_READONLY @@ -472,6 +486,13 @@ public: ArrayRef<std::pair<int, const char *>> getSerializableTargetIndices() const override; + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const override; + + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; + }; namespace AMDGPU { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 4bc89ea2773..d0ba8e68507 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -596,22 +596,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } } - // TODO: only do this when it is needed - switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) { - case AMDGPUSubtarget::SOUTHERN_ISLANDS: - // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states - // ("S_NOP 3") on SI - TII->insertWaitStates(*MBB, MI, 4); - break; - case AMDGPUSubtarget::SEA_ISLANDS: - break; - default: // VOLCANIC_ISLANDS and later - // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states - // ("S_NOP 4") on VI and later. This also applies to VALUs which write - // VCC, but we're unlikely to see VMEM use VCC. - TII->insertWaitStates(*MBB, MI, 5); - } - MI->eraseFromParent(); break; } @@ -991,3 +975,14 @@ unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen, } } } + +bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, + unsigned Reg) const { + const TargetRegisterClass *RC; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + RC = MRI.getRegClass(Reg); + else + RC = getPhysRegClass(Reg); + + return hasVGPRs(RC); +} diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index f64103115c3..e43b2c15642 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -188,6 +188,8 @@ public: unsigned getSGPR32PressureSet() const { return SGPR32SetID; }; unsigned getVGPR32PressureSet() const { return VGPR32SetID; }; + bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const; + private: void buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, unsigned Value, diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 4a46eb45c25..26f73c4ad51 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -42,6 +42,7 @@ def Write64Bit : SchedWrite; class SISchedMachineModel : SchedMachineModel { let CompleteModel = 0; let IssueWidth = 1; + let PostRAScheduler = 1; } def SIFullSpeedModel : SISchedMachineModel; |