diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp | 417 |
1 files changed, 0 insertions, 417 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp deleted file mode 100644 index 01c76327aab..00000000000 --- a/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp +++ /dev/null @@ -1,417 +0,0 @@ -//===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Computations in WWM can overwrite values in inactive channels for -/// variables that the register allocator thinks are dead. This pass adds fake -/// uses of those variables to their def(s) to make sure that they aren't -/// overwritten. -/// -/// As an example, consider this snippet: -/// %vgpr0 = V_MOV_B32_e32 0.0 -/// if (...) { -/// %vgpr1 = ... -/// %vgpr2 = WWM killed %vgpr1 -/// ... = killed %vgpr2 -/// %vgpr0 = V_MOV_B32_e32 1.0 -/// } -/// ... = %vgpr0 -/// -/// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally, -/// we can safely allocate %vgpr0 and %vgpr1 in the same register, since -/// writing %vgpr1 would only write to channels that would be clobbered by the -/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled, -/// it would clobber even the inactive channels for which the if-condition is -/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use -/// of %vgpr0 to its def to make sure they aren't allocated to the -/// same register. -/// -/// In general, we need to figure out what registers might have their inactive -/// channels which are eventually used accidentally clobbered by a WWM -/// instruction. We do that by spotting three separate cases of registers: -/// -/// 1. A "then phi": the value resulting from phi elimination of a phi node at -/// the end of an if..endif. If there is WWM code in the "then", then we -/// make the def at the end of the "then" branch a partial def by adding an -/// implicit use of the register. -/// -/// 2. A "loop exit register": a value written inside a loop but used outside the -/// loop, where there is WWM code inside the loop (the case in the example -/// above). We add an implicit_def of the register in the loop pre-header, -/// and make the original def a partial def by adding an implicit use of the -/// register. -/// -/// 3. A "loop exit phi": the value resulting from phi elimination of a phi node -/// in a loop header. If there is WWM code inside the loop, then we make all -/// defs inside the loop partial defs by adding an implicit use of the -/// register on each one. -/// -/// Note that we do not need to consider an if..else..endif phi. We only need to -/// consider non-uniform control flow, and control flow structurization would -/// have transformed a non-uniform if..else..endif into two if..endifs. -/// -/// The analysis to detect these cases relies on a property of the MIR -/// arising from this pass running straight after PHIElimination and before any -/// coalescing: that any virtual register with more than one definition must be -/// the new register added to lower a phi node by PHIElimination. -/// -/// FIXME: We should detect whether a register in one of the above categories is -/// already live at the WWM code before deciding to add the implicit uses to -/// synthesize its liveness. -/// -/// FIXME: I believe this whole scheme may be flawed due to the possibility of -/// the register allocator doing live interval splitting. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUSubtarget.h" -#include "SIInstrInfo.h" -#include "SIRegisterInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "si-fix-wwm-liveness" - -namespace { - -class SIFixWWMLiveness : public MachineFunctionPass { -private: - MachineDominatorTree *DomTree; - MachineLoopInfo *LoopInfo; - LiveIntervals *LIS = nullptr; - const SIInstrInfo *TII; - const SIRegisterInfo *TRI; - MachineRegisterInfo *MRI; - - std::vector<MachineInstr *> WWMs; - std::vector<MachineOperand *> ThenDefs; - std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs; - std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs; - -public: - static char ID; - - SIFixWWMLiveness() : MachineFunctionPass(ID) { - initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - StringRef getPassName() const override { return "SI Fix WWM Liveness"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(MachineDominatorsID); - AU.addRequiredID(MachineLoopInfoID); - // Should preserve the same set that TwoAddressInstructions does. - AU.addPreserved<SlotIndexes>(); - AU.addPreserved<LiveIntervals>(); - AU.addPreservedID(LiveVariablesID); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - -private: - void processDef(MachineOperand &DefOpnd); - bool processThenDef(MachineOperand *DefOpnd); - bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop); - bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop); -}; - -} // End anonymous namespace. - -INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE, - "SI fix WWM liveness", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE, - "SI fix WWM liveness", false, false) - -char SIFixWWMLiveness::ID = 0; - -char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID; - -FunctionPass *llvm::createSIFixWWMLivenessPass() { - return new SIFixWWMLiveness(); -} - -bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { - LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n"); - bool Modified = false; - - // This doesn't actually need LiveIntervals, but we can preserve them. - LIS = getAnalysisIfAvailable<LiveIntervals>(); - - const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - - TII = ST.getInstrInfo(); - TRI = &TII->getRegisterInfo(); - MRI = &MF.getRegInfo(); - - DomTree = &getAnalysis<MachineDominatorTree>(); - LoopInfo = &getAnalysis<MachineLoopInfo>(); - - // Scan the function to find the WWM sections and the candidate registers for - // having liveness modified. - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (MI.getOpcode() == AMDGPU::EXIT_WWM) - WWMs.push_back(&MI); - else { - for (MachineOperand &DefOpnd : MI.defs()) { - if (DefOpnd.isReg()) { - unsigned Reg = DefOpnd.getReg(); - if (TRI->isVGPR(*MRI, Reg)) - processDef(DefOpnd); - } - } - } - } - } - if (!WWMs.empty()) { - // Synthesize liveness over WWM sections as required. - for (auto ThenDef : ThenDefs) - Modified |= processThenDef(ThenDef); - for (auto LoopExitDef : LoopExitDefs) - Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second); - for (auto LoopPhiDef : LoopPhiDefs) - Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second); - } - - WWMs.clear(); - ThenDefs.clear(); - LoopExitDefs.clear(); - LoopPhiDefs.clear(); - - return Modified; -} - -// During the function scan, process an operand that defines a VGPR. -// This categorizes the register and puts it in the appropriate list for later -// use when processing a WWM section. -void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) { - unsigned Reg = DefOpnd.getReg(); - // Get all the defining instructions. For convenience, make Defs[0] the def - // we are on now. - SmallVector<const MachineInstr *, 4> Defs; - Defs.push_back(DefOpnd.getParent()); - for (auto &MI : MRI->def_instructions(Reg)) { - if (&MI != DefOpnd.getParent()) - Defs.push_back(&MI); - } - // Check whether this def dominates all the others. If not, ignore this def. - // Either it is going to be processed when the scan encounters its other def - // that dominates all defs, or there is no def that dominates all others. - // The latter case is an eliminated phi from an if..else..endif or similar, - // which must be for uniform control flow so can be ignored. - // Because this pass runs shortly after PHIElimination, we assume that any - // multi-def register is a lowered phi, and thus has each def in a separate - // basic block. - for (unsigned I = 1; I != Defs.size(); ++I) { - if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent())) - return; - } - // Check for the case of an if..endif lowered phi: It has two defs, one - // dominates the other, and there is a single use in a successor of the - // dominant def. - // Later we will spot any WWM code inside - // the "then" clause and turn the second def into a partial def so its - // liveness goes through the WWM code in the "then" clause. - if (Defs.size() == 2) { - auto DomDefBlock = Defs[0]->getParent(); - if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) { - auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent(); - for (auto Succ : DomDefBlock->successors()) { - if (Succ == UseBlock) { - LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n"); - ThenDefs.push_back(&DefOpnd); - return; - } - } - } - } - // Check for the case of a non-lowered-phi register (single def) that exits - // a loop, that is, it has a use that is outside a loop that the def is - // inside. We find the outermost loop that the def is inside but a use is - // outside. Later we will spot any WWM code inside that loop and then make - // the def a partial def so its liveness goes round the loop and through the - // WWM code. - if (Defs.size() == 1) { - auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent()); - if (!Loop) - return; - bool IsLoopExit = false; - for (auto &Use : MRI->use_instructions(Reg)) { - auto UseBlock = Use.getParent(); - if (Loop->contains(UseBlock)) - continue; - IsLoopExit = true; - while (auto Parent = Loop->getParentLoop()) { - if (Parent->contains(UseBlock)) - break; - Loop = Parent; - } - } - if (!IsLoopExit) - return; - LLVM_DEBUG(dbgs() << printReg(Reg, TRI) - << " is a loop exit reg with loop header at " - << "bb." << Loop->getHeader()->getNumber() << "\n"); - LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>( - &DefOpnd, Loop)); - return; - } - // Check for the case of a lowered single-preheader-loop phi, that is, a - // multi-def register where the dominating def is in the loop pre-header and - // all other defs are in backedges. Later we will spot any WWM code inside - // that loop and then make the backedge defs partial defs so the liveness - // goes through the WWM code. - // Note that we are ignoring multi-preheader loops on the basis that the - // structurizer does not allow that for non-uniform loops. - // There must be a single use in the loop header. - if (!MRI->hasOneUse(Reg)) - return; - auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent(); - auto Loop = LoopInfo->getLoopFor(UseBlock); - if (!Loop || Loop->getHeader() != UseBlock - || Loop->contains(Defs[0]->getParent())) { - LLVM_DEBUG(dbgs() << printReg(Reg, TRI) - << " is multi-def but single use not in loop header\n"); - return; - } - for (unsigned I = 1; I != Defs.size(); ++I) { - if (!Loop->contains(Defs[I]->getParent())) - return; - } - LLVM_DEBUG(dbgs() << printReg(Reg, TRI) - << " is a loop phi reg with loop header at " - << "bb." << Loop->getHeader()->getNumber() << "\n"); - LoopPhiDefs.push_back( - std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop)); -} - -// Process a then phi def: It has two defs, one dominates the other, and there -// is a single use in a successor of the dominant def. Here we spot any WWM -// code inside the "then" clause and turn the second def into a partial def so -// its liveness goes through the WWM code in the "then" clause. -bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) { - LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent()); - if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) { - // Ignore if dominating def is undef. - LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n"); - return false; - } - unsigned Reg = DefOpnd->getReg(); - // Get the use block, which is the endif block. - auto UseBlock = MRI->use_instr_begin(Reg)->getParent(); - // Check whether there is WWM code inside the then branch. The WWM code must - // be dominated by the if but not dominated by the endif. - bool ContainsWWM = false; - for (auto WWM : WWMs) { - if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent()) - && !DomTree->dominates(UseBlock, WWM->getParent())) { - LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); - ContainsWWM = true; - break; - } - } - if (!ContainsWWM) - return false; - // Get the other def. - MachineInstr *OtherDef = nullptr; - for (auto &MI : MRI->def_instructions(Reg)) { - if (&MI != DefOpnd->getParent()) - OtherDef = &MI; - } - // Make it a partial def. - OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); - LLVM_DEBUG(dbgs() << *OtherDef); - return true; -} - -// Process a loop exit def, that is, a register with a single use in a loop -// that has a use outside the loop. Here we spot any WWM code inside that loop -// and then make the def a partial def so its liveness goes round the loop and -// through the WWM code. -bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd, - MachineLoop *Loop) { - LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent()); - // Check whether there is WWM code inside the loop. - bool ContainsWWM = false; - for (auto WWM : WWMs) { - if (Loop->contains(WWM->getParent())) { - LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); - ContainsWWM = true; - break; - } - } - if (!ContainsWWM) - return false; - unsigned Reg = DefOpnd->getReg(); - // Add a new implicit_def in loop preheader(s). - for (auto Pred : Loop->getHeader()->predecessors()) { - if (!Loop->contains(Pred)) { - auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), Reg); - LLVM_DEBUG(dbgs() << *ImplicitDef); - (void)ImplicitDef; - } - } - // Make the original def partial. - DefOpnd->getParent()->addOperand(MachineOperand::CreateReg( - Reg, false, /*isImp=*/true)); - LLVM_DEBUG(dbgs() << *DefOpnd->getParent()); - return true; -} - -// Process a loop phi def, that is, a multi-def register where the dominating -// def is in the loop pre-header and all other defs are in backedges. Here we -// spot any WWM code inside that loop and then make the backedge defs partial -// defs so the liveness goes through the WWM code. -bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd, - MachineLoop *Loop) { - LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent()); - // Check whether there is WWM code inside the loop. - bool ContainsWWM = false; - for (auto WWM : WWMs) { - if (Loop->contains(WWM->getParent())) { - LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); - ContainsWWM = true; - break; - } - } - if (!ContainsWWM) - return false; - unsigned Reg = DefOpnd->getReg(); - // Remove kill mark from uses. - for (auto &Use : MRI->use_operands(Reg)) - Use.setIsKill(false); - // Make all defs except the dominating one partial defs. - SmallVector<MachineInstr *, 4> Defs; - for (auto &Def : MRI->def_instructions(Reg)) - Defs.push_back(&Def); - for (auto Def : Defs) { - if (DefOpnd->getParent() == Def) - continue; - Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); - LLVM_DEBUG(dbgs() << *Def); - } - return true; -} - |