summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp202
1 files changed, 202 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
new file mode 100644
index 00000000000..8b985637b08
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
@@ -0,0 +1,202 @@
+//===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Computations in WWM can overwrite values in inactive channels for
+/// variables that the register allocator thinks are dead. This pass adds fake
+/// uses of those variables to WWM instructions to make sure that they aren't
+/// overwritten.
+///
+/// As an example, consider this snippet:
+/// %vgpr0 = V_MOV_B32_e32 0.0
+/// if (...) {
+/// %vgpr1 = ...
+/// %vgpr2 = WWM %vgpr1<kill>
+/// ... = %vgpr2<kill>
+/// %vgpr0 = V_MOV_B32_e32 1.0
+/// }
+/// ... = %vgpr0
+///
+/// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
+/// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
+/// writing %vgpr1 would only write to channels that would be clobbered by the
+/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
+/// it would clobber even the inactive channels for which the if-condition is
+/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
+/// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the
+/// same register.
+///
+/// In general, we need to figure out what registers might have their inactive
+/// channels which are eventually used accidentally clobbered by a WWM
+/// instruction. We approximate this using two conditions:
+///
+/// 1. A definition of the variable reaches the WWM instruction.
+/// 2. The variable would be live at the WWM instruction if all its defs were
+/// partial defs (i.e. considered as a use), ignoring normal uses.
+///
+/// If a register matches both conditions, then we add an implicit use of it to
+/// the WWM instruction. Condition #2 is the heart of the matter: every
+/// definition is really a partial definition, since every VALU instruction is
+/// implicitly predicated. We can usually ignore this, but WWM forces us not
+/// to. Condition #1 prevents false positives if the variable is undefined at
+/// the WWM instruction anyways. This is overly conservative in certain cases,
+/// especially in uniform control flow, but this is a workaround anyways until
+/// LLVM gains the notion of predicated uses and definitions of variables.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-wwm-liveness"
+
+namespace {
+
+class SIFixWWMLiveness : public MachineFunctionPass {
+private:
+ LiveIntervals *LIS = nullptr;
+ const SIRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+public:
+ static char ID;
+
+ SIFixWWMLiveness() : MachineFunctionPass(ID) {
+ initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool runOnWWMInstruction(MachineInstr &MI);
+
+ void addDefs(const MachineInstr &MI, SparseBitVector<> &set);
+
+ StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // Should preserve the same set that TwoAddressInstructions does.
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(LiveVariablesID);
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE,
+ "SI fix WWM liveness", false, false)
+
+char SIFixWWMLiveness::ID = 0;
+
+char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
+
+FunctionPass *llvm::createSIFixWWMLivenessPass() {
+ return new SIFixWWMLiveness();
+}
+
+void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs)
+{
+ for (const MachineOperand &Op : MI.defs()) {
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ if (TRI->isVGPR(*MRI, Reg))
+ Regs.set(Reg);
+ }
+ }
+}
+
+bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) {
+ MachineBasicBlock *MBB = WWM.getParent();
+
+ // Compute the registers that are live out of MI by figuring out which defs
+ // are reachable from MI.
+ SparseBitVector<> LiveOut;
+
+ for (auto II = MachineBasicBlock::iterator(WWM), IE =
+ MBB->end(); II != IE; ++II) {
+ addDefs(*II, LiveOut);
+ }
+
+ for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB),
+ E = df_end(MBB);
+ I != E; ++I) {
+ for (const MachineInstr &MI : **I) {
+ addDefs(MI, LiveOut);
+ }
+ }
+
+ // Compute the registers that reach MI.
+ SparseBitVector<> Reachable;
+
+ for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE =
+ MBB->rend(); II != IE; ++II) {
+ addDefs(*II, Reachable);
+ }
+
+ for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB),
+ E = idf_end(MBB);
+ I != E; ++I) {
+ for (const MachineInstr &MI : **I) {
+ addDefs(MI, Reachable);
+ }
+ }
+
+ // find the intersection, and add implicit uses.
+ LiveOut &= Reachable;
+
+ bool Modified = false;
+ for (unsigned Reg : LiveOut) {
+ WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
+ if (LIS) {
+ // FIXME: is there a better way to update the live interval?
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ Modified = true;
+ }
+
+ return Modified;
+}
+
+bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
+ bool Modified = false;
+
+ // This doesn't actually need LiveIntervals, but we can preserve them.
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ TRI = &TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
+ Modified |= runOnWWMInstruction(MI);
+ }
+ }
+ }
+
+ return Modified;
+}
OpenPOWER on IntegriCloud