//===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Computations in WWM can overwrite values in inactive channels for /// variables that the register allocator thinks are dead. This pass adds fake /// uses of those variables to their def(s) to make sure that they aren't /// overwritten. /// /// As an example, consider this snippet: /// %vgpr0 = V_MOV_B32_e32 0.0 /// if (...) { /// %vgpr1 = ... /// %vgpr2 = WWM killed %vgpr1 /// ... = killed %vgpr2 /// %vgpr0 = V_MOV_B32_e32 1.0 /// } /// ... = %vgpr0 /// /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally, /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since /// writing %vgpr1 would only write to channels that would be clobbered by the /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled, /// it would clobber even the inactive channels for which the if-condition is /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use /// of %vgpr0 to its def to make sure they aren't allocated to the /// same register. /// /// In general, we need to figure out what registers might have their inactive /// channels which are eventually used accidentally clobbered by a WWM /// instruction. We do that by spotting three separate cases of registers: /// /// 1. A "then phi": the value resulting from phi elimination of a phi node at /// the end of an if..endif. If there is WWM code in the "then", then we /// make the def at the end of the "then" branch a partial def by adding an /// implicit use of the register. /// /// 2. A "loop exit register": a value written inside a loop but used outside the /// loop, where there is WWM code inside the loop (the case in the example /// above). We add an implicit_def of the register in the loop pre-header, /// and make the original def a partial def by adding an implicit use of the /// register. /// /// 3. A "loop exit phi": the value resulting from phi elimination of a phi node /// in a loop header. If there is WWM code inside the loop, then we make all /// defs inside the loop partial defs by adding an implicit use of the /// register on each one. /// /// Note that we do not need to consider an if..else..endif phi. We only need to /// consider non-uniform control flow, and control flow structurization would /// have transformed a non-uniform if..else..endif into two if..endifs. /// /// The analysis to detect these cases relies on a property of the MIR /// arising from this pass running straight after PHIElimination and before any /// coalescing: that any virtual register with more than one definition must be /// the new register added to lower a phi node by PHIElimination. /// /// FIXME: We should detect whether a register in one of the above categories is /// already live at the WWM code before deciding to add the implicit uses to /// synthesize its liveness. /// /// FIXME: I believe this whole scheme may be flawed due to the possibility of /// the register allocator doing live interval splitting. /// //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "si-fix-wwm-liveness" namespace { class SIFixWWMLiveness : public MachineFunctionPass { private: MachineDominatorTree *DomTree; MachineLoopInfo *LoopInfo; LiveIntervals *LIS = nullptr; const SIInstrInfo *TII; const SIRegisterInfo *TRI; MachineRegisterInfo *MRI; std::vector WWMs; std::vector ThenDefs; std::vector> LoopExitDefs; std::vector> LoopPhiDefs; public: static char ID; SIFixWWMLiveness() : MachineFunctionPass(ID) { initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "SI Fix WWM Liveness"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredID(MachineDominatorsID); AU.addRequiredID(MachineLoopInfoID); // Should preserve the same set that TwoAddressInstructions does. AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(LiveVariablesID); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } private: void processDef(MachineOperand &DefOpnd); bool processThenDef(MachineOperand *DefOpnd); bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop); bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop); }; } // End anonymous namespace. INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE, "SI fix WWM liveness", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE, "SI fix WWM liveness", false, false) char SIFixWWMLiveness::ID = 0; char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID; FunctionPass *llvm::createSIFixWWMLivenessPass() { return new SIFixWWMLiveness(); } bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n"); bool Modified = false; // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable(); const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); DomTree = &getAnalysis(); LoopInfo = &getAnalysis(); // Scan the function to find the WWM sections and the candidate registers for // having liveness modified. for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { if (MI.getOpcode() == AMDGPU::EXIT_WWM) WWMs.push_back(&MI); else { for (MachineOperand &DefOpnd : MI.defs()) { if (DefOpnd.isReg()) { unsigned Reg = DefOpnd.getReg(); if (TRI->isVGPR(*MRI, Reg)) processDef(DefOpnd); } } } } } if (!WWMs.empty()) { // Synthesize liveness over WWM sections as required. for (auto ThenDef : ThenDefs) Modified |= processThenDef(ThenDef); for (auto LoopExitDef : LoopExitDefs) Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second); for (auto LoopPhiDef : LoopPhiDefs) Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second); } WWMs.clear(); ThenDefs.clear(); LoopExitDefs.clear(); LoopPhiDefs.clear(); return Modified; } // During the function scan, process an operand that defines a VGPR. // This categorizes the register and puts it in the appropriate list for later // use when processing a WWM section. void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) { unsigned Reg = DefOpnd.getReg(); // Get all the defining instructions. For convenience, make Defs[0] the def // we are on now. SmallVector Defs; Defs.push_back(DefOpnd.getParent()); for (auto &MI : MRI->def_instructions(Reg)) { if (&MI != DefOpnd.getParent()) Defs.push_back(&MI); } // Check whether this def dominates all the others. If not, ignore this def. // Either it is going to be processed when the scan encounters its other def // that dominates all defs, or there is no def that dominates all others. // The latter case is an eliminated phi from an if..else..endif or similar, // which must be for uniform control flow so can be ignored. // Because this pass runs shortly after PHIElimination, we assume that any // multi-def register is a lowered phi, and thus has each def in a separate // basic block. for (unsigned I = 1; I != Defs.size(); ++I) { if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent())) return; } // Check for the case of an if..endif lowered phi: It has two defs, one // dominates the other, and there is a single use in a successor of the // dominant def. // Later we will spot any WWM code inside // the "then" clause and turn the second def into a partial def so its // liveness goes through the WWM code in the "then" clause. if (Defs.size() == 2) { auto DomDefBlock = Defs[0]->getParent(); if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) { auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent(); for (auto Succ : DomDefBlock->successors()) { if (Succ == UseBlock) { LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n"); ThenDefs.push_back(&DefOpnd); return; } } } } // Check for the case of a non-lowered-phi register (single def) that exits // a loop, that is, it has a use that is outside a loop that the def is // inside. We find the outermost loop that the def is inside but a use is // outside. Later we will spot any WWM code inside that loop and then make // the def a partial def so its liveness goes round the loop and through the // WWM code. if (Defs.size() == 1) { auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent()); if (!Loop) return; bool IsLoopExit = false; for (auto &Use : MRI->use_instructions(Reg)) { auto UseBlock = Use.getParent(); if (Loop->contains(UseBlock)) continue; IsLoopExit = true; while (auto Parent = Loop->getParentLoop()) { if (Parent->contains(UseBlock)) break; Loop = Parent; } } if (!IsLoopExit) return; LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a loop exit reg with loop header at " << "bb." << Loop->getHeader()->getNumber() << "\n"); LoopExitDefs.push_back(std::pair( &DefOpnd, Loop)); return; } // Check for the case of a lowered single-preheader-loop phi, that is, a // multi-def register where the dominating def is in the loop pre-header and // all other defs are in backedges. Later we will spot any WWM code inside // that loop and then make the backedge defs partial defs so the liveness // goes through the WWM code. // Note that we are ignoring multi-preheader loops on the basis that the // structurizer does not allow that for non-uniform loops. // There must be a single use in the loop header. if (!MRI->hasOneUse(Reg)) return; auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent(); auto Loop = LoopInfo->getLoopFor(UseBlock); if (!Loop || Loop->getHeader() != UseBlock || Loop->contains(Defs[0]->getParent())) { LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is multi-def but single use not in loop header\n"); return; } for (unsigned I = 1; I != Defs.size(); ++I) { if (!Loop->contains(Defs[I]->getParent())) return; } LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a loop phi reg with loop header at " << "bb." << Loop->getHeader()->getNumber() << "\n"); LoopPhiDefs.push_back( std::pair(&DefOpnd, Loop)); } // Process a then phi def: It has two defs, one dominates the other, and there // is a single use in a successor of the dominant def. Here we spot any WWM // code inside the "then" clause and turn the second def into a partial def so // its liveness goes through the WWM code in the "then" clause. bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) { LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent()); if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) { // Ignore if dominating def is undef. LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n"); return false; } unsigned Reg = DefOpnd->getReg(); // Get the use block, which is the endif block. auto UseBlock = MRI->use_instr_begin(Reg)->getParent(); // Check whether there is WWM code inside the then branch. The WWM code must // be dominated by the if but not dominated by the endif. bool ContainsWWM = false; for (auto WWM : WWMs) { if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent()) && !DomTree->dominates(UseBlock, WWM->getParent())) { LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); ContainsWWM = true; break; } } if (!ContainsWWM) return false; // Get the other def. MachineInstr *OtherDef = nullptr; for (auto &MI : MRI->def_instructions(Reg)) { if (&MI != DefOpnd->getParent()) OtherDef = &MI; } // Make it a partial def. OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); LLVM_DEBUG(dbgs() << *OtherDef); return true; } // Process a loop exit def, that is, a register with a single use in a loop // that has a use outside the loop. Here we spot any WWM code inside that loop // and then make the def a partial def so its liveness goes round the loop and // through the WWM code. bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop) { LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent()); // Check whether there is WWM code inside the loop. bool ContainsWWM = false; for (auto WWM : WWMs) { if (Loop->contains(WWM->getParent())) { LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); ContainsWWM = true; break; } } if (!ContainsWWM) return false; unsigned Reg = DefOpnd->getReg(); // Add a new implicit_def in loop preheader(s). for (auto Pred : Loop->getHeader()->predecessors()) { if (!Loop->contains(Pred)) { auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), Reg); LLVM_DEBUG(dbgs() << *ImplicitDef); (void)ImplicitDef; } } // Make the original def partial. DefOpnd->getParent()->addOperand(MachineOperand::CreateReg( Reg, false, /*isImp=*/true)); LLVM_DEBUG(dbgs() << *DefOpnd->getParent()); return true; } // Process a loop phi def, that is, a multi-def register where the dominating // def is in the loop pre-header and all other defs are in backedges. Here we // spot any WWM code inside that loop and then make the backedge defs partial // defs so the liveness goes through the WWM code. bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop) { LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent()); // Check whether there is WWM code inside the loop. bool ContainsWWM = false; for (auto WWM : WWMs) { if (Loop->contains(WWM->getParent())) { LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); ContainsWWM = true; break; } } if (!ContainsWWM) return false; unsigned Reg = DefOpnd->getReg(); // Remove kill mark from uses. for (auto &Use : MRI->use_operands(Reg)) Use.setIsKill(false); // Make all defs except the dominating one partial defs. SmallVector Defs; for (auto &Def : MRI->def_instructions(Reg)) Defs.push_back(&Def); for (auto Def : Defs) { if (DefOpnd->getParent() == Def) continue; Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); LLVM_DEBUG(dbgs() << *Def); } return true; }