summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp221
1 files changed, 221 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
new file mode 100644
index 00000000000..f9bfe96f65c
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -0,0 +1,221 @@
+//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Pass to pre-allocated WWM registers
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
+
+namespace {
+
+class SIPreAllocateWWMRegs : public MachineFunctionPass {
+private:
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+ LiveRegMatrix *Matrix;
+ VirtRegMap *VRM;
+ RegisterClassInfo RegClassInfo;
+
+ std::vector<unsigned> RegsToRewrite;
+
+public:
+ static char ID;
+
+ SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
+ initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<SlotIndexes>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool processDef(MachineOperand &MO);
+ void rewriteRegs(MachineFunction &MF);
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
+ "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
+ "SI Pre-allocate WWM Registers", false, false)
+
+char SIPreAllocateWWMRegs::ID = 0;
+
+char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
+
+FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
+ return new SIPreAllocateWWMRegs();
+}
+
+bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+
+ unsigned Reg = MO.getReg();
+
+ if (!TRI->isVGPR(*MRI, Reg))
+ return false;
+
+ if (TRI->isPhysicalRegister(Reg))
+ return false;
+
+ if (VRM->hasPhys(Reg))
+ return false;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+
+ for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
+ if (!MRI->isPhysRegUsed(PhysReg) &&
+ Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
+ Matrix->assign(LI, PhysReg);
+ assert(PhysReg != 0);
+ RegsToRewrite.push_back(Reg);
+ return true;
+ }
+ }
+
+ llvm_unreachable("physreg not found for WWM expression");
+ return false;
+}
+
+void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ const unsigned VirtReg = MO.getReg();
+ if (TRI->isPhysicalRegister(VirtReg))
+ continue;
+
+ if (!VRM->hasPhys(VirtReg))
+ continue;
+
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ const unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ PhysReg = TRI->getSubReg(PhysReg, SubReg);
+ MO.setSubReg(0);
+ }
+
+ MO.setReg(PhysReg);
+ MO.setIsRenamable(false);
+ }
+ }
+ }
+
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ for (unsigned Reg : RegsToRewrite) {
+ LIS->removeInterval(Reg);
+
+ const unsigned PhysReg = VRM->getPhys(Reg);
+ assert(PhysReg != 0);
+ MFI->ReserveWWMRegister(PhysReg);
+ }
+
+ RegsToRewrite.clear();
+
+ // Update the set of reserved registers to include WWM ones.
+ MRI->freezeReservedRegs(MF);
+}
+
+bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ LIS = &getAnalysis<LiveIntervals>();
+ Matrix = &getAnalysis<LiveRegMatrix>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ RegClassInfo.runOnMachineFunction(MF);
+
+ bool RegsAssigned = false;
+
+ // We use a reverse post-order traversal of the control-flow graph to
+ // guarantee that we visit definitions in dominance order. Since WWM
+ // expressions are guaranteed to never involve phi nodes, and we can only
+ // escape WWM through the special WWM instruction, this means that this is a
+ // perfect elimination order, so we can never do any better.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+
+ for (MachineBasicBlock *MBB : RPOT) {
+ bool InWWM = false;
+ for (MachineInstr &MI : *MBB) {
+ if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
+ MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
+ RegsAssigned |= processDef(MI.getOperand(0));
+
+ if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
+ LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
+ InWWM = true;
+ continue;
+ }
+
+ if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
+ LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
+ InWWM = false;
+ }
+
+ if (!InWWM)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
+
+ for (MachineOperand &DefOpnd : MI.defs()) {
+ RegsAssigned |= processDef(DefOpnd);
+ }
+ }
+ }
+
+ if (!RegsAssigned)
+ return false;
+
+ rewriteRegs(MF);
+ return true;
+}
OpenPOWER on IntegriCloud