diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 221 |
1 files changed, 221 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp new file mode 100644 index 00000000000..f9bfe96f65c --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -0,0 +1,221 @@ +//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Pass to pre-allocated WWM registers +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "SIRegisterInfo.h" +#include "SIMachineFunctionInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegisterClassInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-pre-allocate-wwm-regs" + +namespace { + +class SIPreAllocateWWMRegs : public MachineFunctionPass { +private: + const SIInstrInfo *TII; + const SIRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + LiveRegMatrix *Matrix; + VirtRegMap *VRM; + RegisterClassInfo RegClassInfo; + + std::vector<unsigned> RegsToRewrite; + +public: + static char ID; + + SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { + initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<VirtRegMap>(); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<SlotIndexes>(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool processDef(MachineOperand &MO); + void rewriteRegs(MachineFunction &MF); +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, + "SI Pre-allocate WWM Registers", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) +INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, + "SI Pre-allocate WWM Registers", false, false) + +char SIPreAllocateWWMRegs::ID = 0; + +char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; + +FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { + return new SIPreAllocateWWMRegs(); +} + +bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { + if (!MO.isReg()) + return false; + + unsigned Reg = MO.getReg(); + + if (!TRI->isVGPR(*MRI, Reg)) + return false; + + if (TRI->isPhysicalRegister(Reg)) + return false; + + if (VRM->hasPhys(Reg)) + return false; + + LiveInterval &LI = LIS->getInterval(Reg); + + for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { + if (!MRI->isPhysRegUsed(PhysReg) && + Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { + Matrix->assign(LI, PhysReg); + assert(PhysReg != 0); + RegsToRewrite.push_back(Reg); + return true; + } + } + + llvm_unreachable("physreg not found for WWM expression"); + return false; +} + +void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + + const unsigned VirtReg = MO.getReg(); + if (TRI->isPhysicalRegister(VirtReg)) + continue; + + if (!VRM->hasPhys(VirtReg)) + continue; + + unsigned PhysReg = VRM->getPhys(VirtReg); + const unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + PhysReg = TRI->getSubReg(PhysReg, SubReg); + MO.setSubReg(0); + } + + MO.setReg(PhysReg); + MO.setIsRenamable(false); + } + } + } + + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + + for (unsigned Reg : RegsToRewrite) { + LIS->removeInterval(Reg); + + const unsigned PhysReg = VRM->getPhys(Reg); + assert(PhysReg != 0); + MFI->ReserveWWMRegister(PhysReg); + } + + RegsToRewrite.clear(); + + // Update the set of reserved registers to include WWM ones. + MRI->freezeReservedRegs(MF); +} + +bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); + + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + + TII = ST.getInstrInfo(); + TRI = &TII->getRegisterInfo(); + MRI = &MF.getRegInfo(); + + LIS = &getAnalysis<LiveIntervals>(); + Matrix = &getAnalysis<LiveRegMatrix>(); + VRM = &getAnalysis<VirtRegMap>(); + + RegClassInfo.runOnMachineFunction(MF); + + bool RegsAssigned = false; + + // We use a reverse post-order traversal of the control-flow graph to + // guarantee that we visit definitions in dominance order. Since WWM + // expressions are guaranteed to never involve phi nodes, and we can only + // escape WWM through the special WWM instruction, this means that this is a + // perfect elimination order, so we can never do any better. + ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); + + for (MachineBasicBlock *MBB : RPOT) { + bool InWWM = false; + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || + MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) + RegsAssigned |= processDef(MI.getOperand(0)); + + if (MI.getOpcode() == AMDGPU::ENTER_WWM) { + LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n"); + InWWM = true; + continue; + } + + if (MI.getOpcode() == AMDGPU::EXIT_WWM) { + LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n"); + InWWM = false; + } + + if (!InWWM) + continue; + + LLVM_DEBUG(dbgs() << "processing " << MI << "\n"); + + for (MachineOperand &DefOpnd : MI.defs()) { + RegsAssigned |= processDef(DefOpnd); + } + } + } + + if (!RegsAssigned) + return false; + + rewriteRegs(MF); + return true; +} |