diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 59 |
1 files changed, 45 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 768fca054af..dd133d37eb7 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -101,11 +101,12 @@ private: DenseMap<const MachineInstr *, InstrInfo> Instructions; DenseMap<const MachineBasicBlock *, BlockInfo> Blocks; SmallVector<const MachineInstr *, 2> ExecExports; + SmallVector<MachineInstr *, 1> LiveMaskQueries; - char scanInstructions(const MachineFunction &MF, std::vector<WorkItem>& Worklist); + char scanInstructions(MachineFunction &MF, std::vector<WorkItem>& Worklist); void propagateInstruction(const MachineInstr &MI, std::vector<WorkItem>& Worklist); void propagateBlock(const MachineBasicBlock &MBB, std::vector<WorkItem>& Worklist); - char analyzeFunction(const MachineFunction &MF); + char analyzeFunction(MachineFunction &MF); void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before, unsigned SaveWQM, unsigned LiveMaskReg); @@ -113,6 +114,8 @@ private: unsigned SavedWQM); void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry); + void lowerLiveMaskQueries(unsigned LiveMaskReg); + public: static char ID; @@ -148,15 +151,15 @@ FunctionPass *llvm::createSIWholeQuadModePass() { // Scan instructions to determine which ones require an Exact execmask and // which ones seed WQM requirements. -char SIWholeQuadMode::scanInstructions(const MachineFunction &MF, +char SIWholeQuadMode::scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist) { char GlobalFlags = 0; for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { - const MachineBasicBlock &MBB = *BI; + MachineBasicBlock &MBB = *BI; for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) { - const MachineInstr &MI = *II; + MachineInstr &MI = *II; unsigned Opcode = MI.getOpcode(); char Flags; @@ -167,8 +170,13 @@ char SIWholeQuadMode::scanInstructions(const MachineFunction &MF, Flags = StateExact; } else { // Handle export instructions with the exec mask valid flag set - if (Opcode == AMDGPU::EXP && MI.getOperand(4).getImm() != 0) - ExecExports.push_back(&MI); + if (Opcode == AMDGPU::EXP) { + if (MI.getOperand(4).getImm() != 0) + ExecExports.push_back(&MI); + } else if (Opcode == AMDGPU::SI_PS_LIVE) { + LiveMaskQueries.push_back(&MI); + } + continue; } @@ -290,7 +298,7 @@ void SIWholeQuadMode::propagateBlock(const MachineBasicBlock &MBB, } } -char SIWholeQuadMode::analyzeFunction(const MachineFunction &MF) { +char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) { std::vector<WorkItem> Worklist; char GlobalFlags = scanInstructions(MF, Worklist); @@ -424,6 +432,16 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, } } +void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) { + for (MachineInstr *MI : LiveMaskQueries) { + DebugLoc DL = MI->getDebugLoc(); + unsigned Dest = MI->getOperand(0).getReg(); + BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest) + .addReg(LiveMaskReg); + MI->eraseFromParent(); + } +} + bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS) return false; @@ -431,30 +449,43 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { Instructions.clear(); Blocks.clear(); ExecExports.clear(); + LiveMaskQueries.clear(); TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); TRI = static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); MRI = &MF.getRegInfo(); char GlobalFlags = analyzeFunction(MF); - if (!(GlobalFlags & StateWQM)) - return false; + if (!(GlobalFlags & StateWQM)) { + lowerLiveMaskQueries(AMDGPU::EXEC); + return !LiveMaskQueries.empty(); + } + // Store a copy of the original live mask when required MachineBasicBlock &Entry = MF.front(); MachineInstr *EntryMI = Entry.getFirstNonPHI(); + unsigned LiveMaskReg = 0; + + if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) { + LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg) + .addReg(AMDGPU::EXEC); + } if (GlobalFlags == StateWQM) { // For a shader that needs only WQM, we can just set it once. BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC).addReg(AMDGPU::EXEC); + + lowerLiveMaskQueries(LiveMaskReg); + // EntryMI may become invalid here return true; } - // Handle the general case - unsigned LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); - BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg) - .addReg(AMDGPU::EXEC); + lowerLiveMaskQueries(LiveMaskReg); + EntryMI = nullptr; + // Handle the general case for (const auto &BII : Blocks) processBlock(const_cast<MachineBasicBlock &>(*BII.first), LiveMaskReg, BII.first == &*MF.begin()); |