diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp new file mode 100644 index 00000000000..8b985637b08 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp @@ -0,0 +1,202 @@ +//===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Computations in WWM can overwrite values in inactive channels for +/// variables that the register allocator thinks are dead. This pass adds fake +/// uses of those variables to WWM instructions to make sure that they aren't +/// overwritten. +/// +/// As an example, consider this snippet: +/// %vgpr0 = V_MOV_B32_e32 0.0 +/// if (...) { +/// %vgpr1 = ... +/// %vgpr2 = WWM %vgpr1<kill> +/// ... = %vgpr2<kill> +/// %vgpr0 = V_MOV_B32_e32 1.0 +/// } +/// ... = %vgpr0 +/// +/// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally, +/// we can safely allocate %vgpr0 and %vgpr1 in the same register, since +/// writing %vgpr1 would only write to channels that would be clobbered by the +/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled, +/// it would clobber even the inactive channels for which the if-condition is +/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use +/// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the +/// same register. +/// +/// In general, we need to figure out what registers might have their inactive +/// channels which are eventually used accidentally clobbered by a WWM +/// instruction. We approximate this using two conditions: +/// +/// 1. A definition of the variable reaches the WWM instruction. +/// 2. The variable would be live at the WWM instruction if all its defs were +/// partial defs (i.e. considered as a use), ignoring normal uses. +/// +/// If a register matches both conditions, then we add an implicit use of it to +/// the WWM instruction. Condition #2 is the heart of the matter: every +/// definition is really a partial definition, since every VALU instruction is +/// implicitly predicated. We can usually ignore this, but WWM forces us not +/// to. Condition #1 prevents false positives if the variable is undefined at +/// the WWM instruction anyways. This is overly conservative in certain cases, +/// especially in uniform control flow, but this is a workaround anyways until +/// LLVM gains the notion of predicated uses and definitions of variables. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-fix-wwm-liveness" + +namespace { + +class SIFixWWMLiveness : public MachineFunctionPass { +private: + LiveIntervals *LIS = nullptr; + const SIRegisterInfo *TRI; + MachineRegisterInfo *MRI; + +public: + static char ID; + + SIFixWWMLiveness() : MachineFunctionPass(ID) { + initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + bool runOnWWMInstruction(MachineInstr &MI); + + void addDefs(const MachineInstr &MI, SparseBitVector<> &set); + + StringRef getPassName() const override { return "SI Fix WWM Liveness"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // Should preserve the same set that TwoAddressInstructions does. + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(LiveVariablesID); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE, + "SI fix WWM liveness", false, false) + +char SIFixWWMLiveness::ID = 0; + +char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID; + +FunctionPass *llvm::createSIFixWWMLivenessPass() { + return new SIFixWWMLiveness(); +} + +void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs) +{ + for (const MachineOperand &Op : MI.defs()) { + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (TRI->isVGPR(*MRI, Reg)) + Regs.set(Reg); + } + } +} + +bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) { + MachineBasicBlock *MBB = WWM.getParent(); + + // Compute the registers that are live out of MI by figuring out which defs + // are reachable from MI. + SparseBitVector<> LiveOut; + + for (auto II = MachineBasicBlock::iterator(WWM), IE = + MBB->end(); II != IE; ++II) { + addDefs(*II, LiveOut); + } + + for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB), + E = df_end(MBB); + I != E; ++I) { + for (const MachineInstr &MI : **I) { + addDefs(MI, LiveOut); + } + } + + // Compute the registers that reach MI. + SparseBitVector<> Reachable; + + for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE = + MBB->rend(); II != IE; ++II) { + addDefs(*II, Reachable); + } + + for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB), + E = idf_end(MBB); + I != E; ++I) { + for (const MachineInstr &MI : **I) { + addDefs(MI, Reachable); + } + } + + // find the intersection, and add implicit uses. + LiveOut &= Reachable; + + bool Modified = false; + for (unsigned Reg : LiveOut) { + WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); + if (LIS) { + // FIXME: is there a better way to update the live interval? + LIS->removeInterval(Reg); + LIS->createAndComputeVirtRegInterval(Reg); + } + Modified = true; + } + + return Modified; +} + +bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { + bool Modified = false; + + // This doesn't actually need LiveIntervals, but we can preserve them. + LIS = getAnalysisIfAvailable<LiveIntervals>(); + + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + TRI = &TII->getRegisterInfo(); + MRI = &MF.getRegInfo(); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() == AMDGPU::EXIT_WWM) { + Modified |= runOnWWMInstruction(MI); + } + } + } + + return Modified; +} |