summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/Passes.h3
-rw-r--r--llvm/include/llvm/InitializePasses.h1
-rw-r--r--llvm/lib/CodeGen/CMakeLists.txt1
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp1
-rw-r--r--llvm/lib/CodeGen/DetectDeadLanes.cpp530
-rw-r--r--llvm/lib/CodeGen/Passes.cpp2
-rw-r--r--llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir408
7 files changed, 946 insertions, 0 deletions
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 589c0500a41..9b41bcbb3ac 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -470,6 +470,9 @@ namespace llvm {
/// DeadMachineInstructionElim - This pass removes dead machine instructions.
extern char &DeadMachineInstructionElimID;
+ /// This pass adds dead/undef flags after analyzing subregister lanes.
+ extern char &DetectDeadLanesID;
+
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 353a7f6487e..c267111c838 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -110,6 +110,7 @@ void initializeDeadInstEliminationPass(PassRegistry&);
void initializeDeadMachineInstructionElimPass(PassRegistry&);
void initializeDelinearizationPass(PassRegistry &);
void initializeDependenceAnalysisPass(PassRegistry&);
+void initializeDetectDeadLanesPass(PassRegistry&);
void initializeDivergenceAnalysisPass(PassRegistry&);
void initializeDomOnlyPrinterPass(PassRegistry&);
void initializeDomOnlyViewerPass(PassRegistry&);
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 0717c3beefe..2f99802d148 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_library(LLVMCodeGen
CodeGenPrepare.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
+ DetectDeadLanes.cpp
DFAPacketizer.cpp
DwarfEHPrepare.cpp
EarlyIfConversion.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 7ab69d7c326..d604fcfff57 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -24,6 +24,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBranchFolderPassPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeDetectDeadLanesPass(Registry);
initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
initializeExpandISelPseudosPass(Registry);
diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp
new file mode 100644
index 00000000000..2fb7b01f62c
--- /dev/null
+++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -0,0 +1,530 @@
+//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Analysis that tracks defined/used subregister lanes across COPY instructions
+/// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE,
+/// INSERT_SUBREG, EXTRACT_SUBREG).
+/// The information is used to detect dead definitions and the usage of
+/// (completely) undefined values and mark the operands as such.
+/// This pass is necessary because the dead/undef status is not obvious anymore
+/// when subregisters are involved.
+///
+/// Example:
+/// %vreg0 = some definition
+/// %vreg1 = IMPLICIT_DEF
+/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1
+/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1
+/// = use %vreg3
+/// The %vreg0 definition is dead and %vreg3 contains an undefined value.
+//
+//===----------------------------------------------------------------------===//
+
+#include <deque>
+#include <vector>
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "detect-dead-lanes"
+
+namespace {
+
+/// Contains a bitmask of which lanes of a given virtual register are
+/// defined and which ones are actually used.
+struct VRegInfo {
+ LaneBitmask UsedLanes;
+ LaneBitmask DefinedLanes;
+};
+
+class DetectDeadLanes : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ DetectDeadLanes() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override { return "Detect Dead Lanes"; }
+
+private:
+ /// Add used lane bits on the register used by operand \p MO. This translates
+ /// the bitmask based on the operands subregister, and puts the register into
+ /// the worklist if any new bits were added.
+ void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes);
+
+ /// Given a bitmask \p UsedLanes for the used lanes on a def output of a
+ /// COPY-like instruction determine the lanes used on the use operands
+ /// and call addUsedLanesOnOperand() for them.
+ void transferUsedLanesStep(const MachineOperand &Def, LaneBitmask UsedLanes);
+
+ /// Given a use regiser operand \p Use and a mask of defined lanes, check
+ /// if the operand belongs to a lowerToCopies() instruction, transfer the
+ /// mask to the def and put the instruction into the worklist.
+ void transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes);
+
+ /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum
+ /// of COPY-like instruction, determine which lanes are defined at the output
+ /// operand \p Def.
+ LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum,
+ LaneBitmask DefinedLanes);
+
+ LaneBitmask determineInitialDefinedLanes(unsigned Reg);
+ LaneBitmask determineInitialUsedLanes(unsigned Reg);
+
+ const MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+
+ void PutInWorklist(unsigned RegIdx) {
+ if (WorklistMembers.test(RegIdx))
+ return;
+ WorklistMembers.set(RegIdx);
+ Worklist.push_back(RegIdx);
+ }
+
+ VRegInfo *VRegInfos;
+ /// Worklist containing virtreg indexes.
+ std::deque<unsigned> Worklist;
+ BitVector WorklistMembers;
+ /// This bitvector is set for each vreg index where the vreg is defined
+ /// by an instruction where lowersToCopies()==true.
+ BitVector DefinedByCopy;
+};
+
+} // end anonymous namespace
+
+char DetectDeadLanes::ID = 0;
+char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
+
+INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes",
+ false, false);
+
+/// Returns true if \p MI will get lowered to a series of COPY instructions.
+/// We call this a COPY-like instruction.
+static bool lowersToCopies(const MachineInstr &MI) {
+ // Note: We could support instructions with MCInstrDesc::isRegSequenceLike(),
+ // isExtractSubRegLike(), isInsertSubregLike() in the future even though they
+ // are not lowered to a COPY.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::EXTRACT_SUBREG:
+ return true;
+ }
+ return false;
+}
+
+static bool isCrossCopy(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ const TargetRegisterClass *DstRC,
+ const MachineOperand &MO) {
+ assert(lowersToCopies(MI));
+ unsigned SrcReg = MO.getReg();
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ if (DstRC == SrcRC)
+ return false;
+
+ unsigned SrcSubIdx = MO.getSubReg();
+
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned DstSubIdx = 0;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::INSERT_SUBREG:
+ if (MI.getOperandNo(&MO) == 2)
+ DstSubIdx = MI.getOperand(3).getImm();
+ break;
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned OpNum = MI.getOperandNo(&MO);
+ DstSubIdx = MI.getOperand(OpNum+1).getImm();
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ unsigned SubReg = MI.getOperand(2).getImm();
+ SrcSubIdx = TRI.composeSubRegIndices(SubReg, SrcSubIdx);
+ }
+ }
+
+ unsigned PreA, PreB; // Unused.
+ if (SrcSubIdx && DstSubIdx)
+ return !TRI.getCommonSuperRegClass(SrcRC, SrcSubIdx, DstRC, DstSubIdx, PreA,
+ PreB);
+ if (SrcSubIdx)
+ return !TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSubIdx);
+ if (DstSubIdx)
+ return !TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSubIdx);
+ return !TRI.getCommonSubClass(SrcRC, DstRC);
+}
+
+void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
+ LaneBitmask UsedLanes) {
+ if (!MO.readsReg())
+ return;
+ unsigned MOReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(MOReg))
+ return;
+
+ unsigned MOSubReg = MO.getSubReg();
+ if (MOSubReg != 0)
+ UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes);
+ UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
+
+ unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg);
+ VRegInfo &MORegInfo = VRegInfos[MORegIdx];
+ LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
+ // Any change at all?
+ if ((UsedLanes & ~PrevUsedLanes) == 0)
+ return;
+
+ // Set UsedLanes and remember instruction for further propagation.
+ MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes;
+ if (DefinedByCopy.test(MORegIdx))
+ PutInWorklist(MORegIdx);
+}
+
+void DetectDeadLanes::transferUsedLanesStep(const MachineOperand &Def,
+ LaneBitmask UsedLanes) {
+ const MachineInstr &MI = *Def.getParent();
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ for (const MachineOperand &MO : MI.uses()) {
+ if (MO.isReg() && MO.isUse())
+ addUsedLanesOnOperand(MO, UsedLanes);
+ }
+ break;
+ case TargetOpcode::REG_SEQUENCE: {
+ // Note: This loop makes the conservative assumption that subregister
+ // indices do not overlap or that we do not know how the overlap is
+ // resolved when lowering to copies.
+ for (unsigned I = 1, N = MI.getNumOperands(); I < N; I += 2) {
+ const MachineOperand &MO = MI.getOperand(I);
+ unsigned SubIdx = MI.getOperand(I + 1).getImm();
+ LaneBitmask MOUsedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
+
+ addUsedLanesOnOperand(MO, MOUsedLanes);
+ }
+ break;
+ }
+ case TargetOpcode::INSERT_SUBREG: {
+ const MachineOperand &MO2 = MI.getOperand(2);
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ LaneBitmask MO2UsedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ addUsedLanesOnOperand(MO2, MO2UsedLanes);
+
+ const MachineOperand &MO1 = MI.getOperand(1);
+ unsigned DefReg = Def.getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LaneBitmask MO1UsedLanes;
+ if (RC->CoveredBySubRegs)
+ MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx);
+ else
+ MO1UsedLanes = RC->LaneMask;
+ addUsedLanesOnOperand(MO1, MO1UsedLanes);
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ const MachineOperand &MO = MI.getOperand(1);
+ unsigned SubIdx = MI.getOperand(2).getImm();
+ LaneBitmask MOUsedLanes =
+ TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ addUsedLanesOnOperand(MO, MOUsedLanes);
+ break;
+ }
+ default:
+ llvm_unreachable("function must be called with COPY-like instruction");
+ }
+}
+
+void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes) {
+ if (!Use.readsReg())
+ return;
+ // Check whether the operand writes a vreg and is part of a COPY-like
+ // instruction.
+ const MachineInstr &MI = *Use.getParent();
+ if (MI.getDesc().getNumDefs() != 1)
+ return;
+ // FIXME: PATCHPOINT instructions announce a Def that does not always exist,
+ // they really need to be modeled differently!
+ if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
+ return;
+ const MachineOperand &Def = *MI.defs().begin();
+ unsigned DefReg = Def.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ return;
+ unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ if (!DefinedByCopy.test(DefRegIdx))
+ return;
+
+ unsigned OpNum = MI.getOperandNo(&Use);
+ DefinedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes);
+ DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes);
+
+ VRegInfo &RegInfo = VRegInfos[DefRegIdx];
+ LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
+ // Any change at all?
+ if ((DefinedLanes & ~PrevDefinedLanes) == 0)
+ return;
+
+ RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
+ PutInWorklist(DefRegIdx);
+}
+
+LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def,
+ unsigned OpNum,
+ LaneBitmask DefinedLanes) {
+ const MachineInstr &MI = *Def.getParent();
+ // Translate DefinedLanes if necessary.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
+ break;
+ }
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ if (OpNum == 2) {
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
+ } else {
+ assert(OpNum == 1 && "INSERT_SUBREG must have two operands");
+ // Ignore lanes defined by operand 2.
+ DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx);
+ }
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(2).getImm();
+ assert(OpNum == 1 && "EXTRACT_SUBREG must have one register operand only");
+ DefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ break;
+ }
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ break;
+ default:
+ llvm_unreachable("function must be called with COPY-like instruction");
+ }
+
+ unsigned SubIdx = Def.getSubReg();
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg());
+ return DefinedLanes;
+}
+
+LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
+ // Live-In or unused registers have no definition but are considered fully
+ // defined.
+ if (!MRI->hasOneDef(Reg))
+ return ~0u;
+
+ const MachineOperand &Def = *MRI->def_begin(Reg);
+ const MachineInstr &DefMI = *Def.getParent();
+ if (lowersToCopies(DefMI)) {
+ // Start optimisatically with no used or defined lanes for copy
+ // instructions. The following dataflow analysis will add more bits.
+ unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ DefinedByCopy.set(RegIdx);
+ PutInWorklist(RegIdx);
+
+ if (Def.isDead())
+ return 0;
+
+ // COPY/PHI can copy across unrelated register classes (example: float/int)
+ // with incompatible subregister structure. Do not include these in the
+ // dataflow analysis since we cannot transfer lanemasks in a meaningful way.
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+
+ // Determine initially DefinedLanes.
+ LaneBitmask DefinedLanes = 0;
+ for (const MachineOperand &MO : DefMI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+
+ LaneBitmask MODefinedLanes;
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ MODefinedLanes = ~0u;
+ } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
+ MODefinedLanes = ~0u;
+ } else {
+ assert(TargetRegisterInfo::isVirtualRegister(MOReg));
+ if (MRI->hasOneDef(MOReg)) {
+ const MachineOperand &MODef = *MRI->def_begin(MOReg);
+ const MachineInstr &MODefMI = *MODef.getParent();
+ // Bits from copy-like operations will be added later.
+ if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef())
+ continue;
+ }
+ unsigned MOSubReg = MO.getSubReg();
+ MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg);
+ MODefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(
+ MOSubReg, MODefinedLanes);
+ }
+
+ unsigned OpNum = DefMI.getOperandNo(&MO);
+ DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes);
+ }
+ return DefinedLanes;
+ }
+ if (DefMI.isImplicitDef() || Def.isDead())
+ return 0;
+
+ unsigned SubReg = Def.getSubReg();
+ return SubReg != 0 ? TRI->getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+}
+
+LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
+ LaneBitmask UsedLanes = 0;
+ for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ if (!MO.readsReg())
+ continue;
+
+ const MachineInstr &UseMI = *MO.getParent();
+ if (UseMI.isKill())
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (lowersToCopies(UseMI)) {
+ assert(UseMI.getDesc().getNumDefs() == 1);
+ const MachineOperand &Def = *UseMI.defs().begin();
+ unsigned DefReg = Def.getReg();
+ // The used lanes of COPY-like instruction operands are determined by the
+ // following dataflow analysis.
+ if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
+ // But ignore copies across incompatible register classes.
+ bool CrossCopy = false;
+ if (lowersToCopies(UseMI)) {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
+ CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
+ }
+
+ if (!CrossCopy)
+ continue;
+ }
+ }
+
+ // Shortcut: All lanes are used.
+ if (SubReg == 0)
+ return MRI->getMaxLaneMaskForVReg(Reg);
+
+ UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg);
+ }
+ return UsedLanes;
+}
+
+bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
+ // Don't bother if we won't track subregister liveness later. This pass is
+ // required for correctness if subregister liveness is enabled because the
+ // register coalescer cannot deal with hidden dead defs. However without
+ // subregister liveness enabled, the expected benefits of this pass are small
+ // so we safe the compile time.
+ if (!MF.getSubtarget().enableSubRegLiveness()) {
+ DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
+ return false;
+ }
+
+ MRI = &MF.getRegInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ VRegInfos = new VRegInfo[NumVirtRegs];
+ WorklistMembers.resize(NumVirtRegs);
+ DefinedByCopy.resize(NumVirtRegs);
+
+ // First pass: Populate defs/uses of vregs with initial values
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+
+ // Determine used/defined lanes and add copy instructions to worklist.
+ VRegInfo &Info = VRegInfos[RegIdx];
+ Info.DefinedLanes = determineInitialDefinedLanes(Reg);
+ Info.UsedLanes = determineInitialUsedLanes(Reg);
+ }
+
+ // Iterate as long as defined lanes/used lanes keep changing.
+ while (!Worklist.empty()) {
+ unsigned RegIdx = Worklist.front();
+ Worklist.pop_front();
+ WorklistMembers.reset(RegIdx);
+ VRegInfo &Info = VRegInfos[RegIdx];
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+
+ // Transfer UsedLanes to operands of DefMI (backwards dataflow).
+ MachineOperand &Def = *MRI->def_begin(Reg);
+ transferUsedLanesStep(Def, Info.UsedLanes);
+ // Transfer DefinedLanes to users of Reg (forward dataflow).
+ for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg))
+ transferDefinedLanesStep(MO, Info.DefinedLanes);
+ }
+
+ DEBUG(
+ dbgs() << "Defined/Used lanes:\n";
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ const VRegInfo &Info = VRegInfos[RegIdx];
+ dbgs() << PrintReg(Reg, nullptr)
+ << " Used: " << PrintLaneMask(Info.UsedLanes)
+ << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
+ }
+ dbgs() << "\n";
+ );
+
+ // Mark operands as dead/unused.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ const VRegInfo &RegInfo = VRegInfos[RegIdx];
+ if (RegInfo.UsedLanes == 0 && MO.isDef() && !MO.isDead()) {
+ DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI);
+ MO.setIsDead();
+ }
+ if (((RegInfo.UsedLanes & Mask) == 0 ||
+ (RegInfo.DefinedLanes & Mask) == 0) && MO.readsReg()) {
+ DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI);
+ MO.setIsUndef();
+ }
+ }
+ }
+ }
+
+ DefinedByCopy.clear();
+ WorklistMembers.clear();
+ delete[] VRegInfos;
+ return true;
+}
diff --git a/llvm/lib/CodeGen/Passes.cpp b/llvm/lib/CodeGen/Passes.cpp
index 94b42089060..27b1ce87d0a 100644
--- a/llvm/lib/CodeGen/Passes.cpp
+++ b/llvm/lib/CodeGen/Passes.cpp
@@ -736,6 +736,8 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&DetectDeadLanesID, false);
+
addPass(&ProcessImplicitDefsID, false);
// LiveVariables currently requires pure SSA form.
diff --git a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir
new file mode 100644
index 00000000000..d04b3f13e3f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir
@@ -0,0 +1,408 @@
+# RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define void @test0() { ret void }
+ define void @test1() { ret void }
+ define void @test2() { ret void }
+ define void @test3() { ret void }
+ define void @test4() { ret void }
+ define void @loop0() { ret void }
+ define void @loop1() { ret void }
+ define void @loop2() { ret void }
+...
+---
+# Combined use/def transfer check, the basics.
+# CHECK-LABEL: name: test0
+# CHECK: S_NOP 0, implicit-def %0
+# CHECK: S_NOP 0, implicit-def %1
+# CHECK: S_NOP 0, implicit-def dead %2
+# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %3:sub0
+# CHECK: S_NOP 0, implicit %3:sub1
+# CHECK: S_NOP 0, implicit undef %3:sub2
+# CHECK: %4 = COPY %3:sub0_sub1
+# CHECK: %5 = COPY %3:sub2_sub3
+# CHECK: S_NOP 0, implicit %4:sub0
+# CHECK: S_NOP 0, implicit %4:sub1
+# CHECK: S_NOP 0, implicit undef %5:sub0
+name: test0
+isSSA: true
+registers:
+ - { id: 0, class: sreg_32 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sreg_32 }
+ - { id: 3, class: sreg_128 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_64 }
+body: |
+ bb.0:
+ S_NOP 0, implicit-def %0
+ S_NOP 0, implicit-def %1
+ S_NOP 0, implicit-def %2
+ %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub3
+ S_NOP 0, implicit %3:sub0
+ S_NOP 0, implicit %3:sub1
+ S_NOP 0, implicit %3:sub2
+ %4 = COPY %3:sub0_sub1
+ %5 = COPY %3:sub2_sub3
+ S_NOP 0, implicit %4:sub0
+ S_NOP 0, implicit %4:sub1
+ S_NOP 0, implicit %5:sub0
+...
+---
+# Check defined lanes transfer; Includes checking for some special cases like
+# undef operands or IMPLICIT_DEF definitions.
+# CHECK-LABEL: name: test1
+# CHECK: %0 = REG_SEQUENCE %sgpr0, {{[0-9]+}}, %sgpr0, {{[0-9]+}}
+# CHECK: %1 = INSERT_SUBREG %0, %sgpr1, {{[0-9]+}}
+# CHECK: %2 = INSERT_SUBREG %0:sub2_sub3, %sgpr42, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %1:sub0
+# CHECK: S_NOP 0, implicit undef %1:sub1
+# CHECK: S_NOP 0, implicit %1:sub2
+# CHECK: S_NOP 0, implicit %1:sub3
+# CHECK: S_NOP 0, implicit %2:sub0
+# CHECK: S_NOP 0, implicit undef %2:sub1
+
+# CHECK: %3 = IMPLICIT_DEF
+# CHECK: %4 = INSERT_SUBREG %0, undef %3, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit undef %4:sub0
+# CHECK: S_NOP 0, implicit undef %4:sub1
+# CHECK: S_NOP 0, implicit %4:sub2
+# CHECK: S_NOP 0, implicit undef %4:sub3
+
+# CHECK: %5 = EXTRACT_SUBREG %0, {{[0-9]+}}
+# CHECK: %6 = EXTRACT_SUBREG %5, {{[0-9]+}}
+# CHECK: %7 = EXTRACT_SUBREG %5, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %5
+# CHECK: S_NOP 0, implicit %6
+# CHECK: S_NOP 0, implicit undef %7
+
+# CHECK: %8 = IMPLICIT_DEF
+# CHECK: %9 = EXTRACT_SUBREG undef %8, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit undef %9
+
+# CHECK: %10 = EXTRACT_SUBREG undef %0, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit undef %10
+name: test1
+isSSA: true
+registers:
+ - { id: 0, class: sreg_128 }
+ - { id: 1, class: sreg_128 }
+ - { id: 2, class: sreg_64 }
+ - { id: 3, class: sreg_32 }
+ - { id: 4, class: sreg_128 }
+ - { id: 5, class: sreg_64 }
+ - { id: 6, class: sreg_32 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_64 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+body: |
+ bb.0:
+ %0 = REG_SEQUENCE %sgpr0, %subreg.sub0, %sgpr0, %subreg.sub2
+ %1 = INSERT_SUBREG %0, %sgpr1, %subreg.sub3
+ %2 = INSERT_SUBREG %0:sub2_sub3, %sgpr42, %subreg.sub0
+ S_NOP 0, implicit %1:sub0
+ S_NOP 0, implicit %1:sub1
+ S_NOP 0, implicit %1:sub2
+ S_NOP 0, implicit %1:sub3
+ S_NOP 0, implicit %2:sub0
+ S_NOP 0, implicit %2:sub1
+
+ %3 = IMPLICIT_DEF
+ %4 = INSERT_SUBREG %0, %3, %subreg.sub0
+ S_NOP 0, implicit %4:sub0
+ S_NOP 0, implicit %4:sub1
+ S_NOP 0, implicit %4:sub2
+ S_NOP 0, implicit %4:sub3
+
+ %5 = EXTRACT_SUBREG %0, %subreg.sub0_sub1
+ %6 = EXTRACT_SUBREG %5, %subreg.sub0
+ %7 = EXTRACT_SUBREG %5, %subreg.sub1
+ S_NOP 0, implicit %5
+ S_NOP 0, implicit %6
+ S_NOP 0, implicit %7
+
+ %8 = IMPLICIT_DEF
+ %9 = EXTRACT_SUBREG %8, %subreg.sub1
+ S_NOP 0, implicit %9
+
+ %10 = EXTRACT_SUBREG undef %0, %subreg.sub2_sub3
+ S_NOP 0, implicit %10
+...
+---
+# Check used lanes transfer; Includes checking for some special cases like
+# undef operands.
+# CHECK-LABEL: name: test2
+# CHECK: S_NOP 0, implicit-def dead %0
+# CHECK: S_NOP 0, implicit-def %1
+# CHECK: S_NOP 0, implicit-def %2
+# CHECK: %3 = REG_SEQUENCE undef %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %3:sub1
+# CHECK: S_NOP 0, implicit %3:sub3
+
+# CHECK: S_NOP 0, implicit-def %4
+# CHECK: S_NOP 0, implicit-def dead %5
+# CHECK: %6 = REG_SEQUENCE %4, {{[0-9]+}}, undef %5, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %6
+
+# CHECK: S_NOP 0, implicit-def dead %7
+# CHECK: S_NOP 0, implicit-def %8
+# CHECK: %9 = INSERT_SUBREG undef %7, %8, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %9:sub2
+
+# CHECK: S_NOP 0, implicit-def %10
+# CHECK: S_NOP 0, implicit-def dead %11
+# CHECK: %12 = INSERT_SUBREG %10, undef %11, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %12:sub3
+
+# CHECK: S_NOP 0, implicit-def %13
+# CHECK: S_NOP 0, implicit-def dead %14
+# CHECK: %15 = REG_SEQUENCE %13, {{[0-9]+}}, undef %14, {{[0-9]+}}
+# CHECK: %16 = EXTRACT_SUBREG %15, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %16:sub1
+
+name: test2
+isSSA: true
+registers:
+ - { id: 0, class: sreg_32 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sreg_64 }
+ - { id: 3, class: sreg_128 }
+ - { id: 4, class: sreg_32 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_128 }
+ - { id: 8, class: sreg_64 }
+ - { id: 9, class: sreg_128 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: sreg_64 }
+ - { id: 12, class: sreg_128 }
+ - { id: 13, class: sreg_64 }
+ - { id: 14, class: sreg_64 }
+ - { id: 15, class: sreg_128 }
+ - { id: 16, class: sreg_64 }
+body: |
+ bb.0:
+ S_NOP 0, implicit-def %0
+ S_NOP 0, implicit-def %1
+ S_NOP 0, implicit-def %2
+ %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2_sub3
+ S_NOP 0, implicit %3:sub1
+ S_NOP 0, implicit %3:sub3
+
+ S_NOP 0, implicit-def %4
+ S_NOP 0, implicit-def %5
+ %6 = REG_SEQUENCE %4, %subreg.sub0, undef %5, %subreg.sub1
+ S_NOP 0, implicit %6
+
+ S_NOP 0, implicit-def %7
+ S_NOP 0, implicit-def %8
+ %9 = INSERT_SUBREG %7, %8, %subreg.sub2_sub3
+ S_NOP 0, implicit %9:sub2
+
+ S_NOP 0, implicit-def %10
+ S_NOP 0, implicit-def %11
+ %12 = INSERT_SUBREG %10, %11, %subreg.sub0_sub1
+ S_NOP 0, implicit %12:sub3
+
+ S_NOP 0, implicit-def %13
+ S_NOP 0, implicit-def %14
+ %15 = REG_SEQUENCE %13, %subreg.sub0_sub1, %14, %subreg.sub2_sub3
+ %16 = EXTRACT_SUBREG %15, %subreg.sub0_sub1
+ S_NOP 0, implicit %16:sub1
+...
+---
+# Check that copies to physregs use all lanes, copies from physregs define all
+# lanes. So we should not get a dead/undef flag here.
+# CHECK-LABEL: name: test3
+# CHECK: S_NOP 0, implicit-def %0
+# CHECK: %vcc = COPY %0
+# CHECK: %1 = COPY %vcc
+# CHECK: S_NOP 0, implicit %1
+name: test3
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_64 }
+body: |
+ bb.0:
+ S_NOP 0, implicit-def %0
+ %vcc = COPY %0
+
+ %1 = COPY %vcc
+ S_NOP 0, implicit %1
+...
+---
+# Check that implicit-def/kill do not count as def/uses.
+# CHECK-LABEL: name: test4
+# CHECK: S_NOP 0, implicit-def dead %0
+# CHECK: KILL undef %0
+# CHECK: %1 = IMPLICIT_DEF
+# CHECK: S_NOP 0, implicit undef %1
+name: test4
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_64 }
+body: |
+ bb.0:
+ S_NOP 0, implicit-def %0
+ KILL %0
+
+ %1 = IMPLICIT_DEF
+ S_NOP 0, implicit %1
+...
+---
+# Check "optimistic" dataflow fixpoint in phi-loops.
+# CHECK-LABEL: name: loop0
+# CHECK: bb.0:
+# CHECK: S_NOP 0, implicit-def %0
+# CHECK: S_NOP 0, implicit-def dead %1
+# CHECK: S_NOP 0, implicit-def dead %2
+# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, undef %1, {{[0-9]+}}, undef %2, {{[0-9]+}}
+
+# CHECK: bb.1:
+# CHECK: %4 = PHI %3, %bb.0, %5, %bb.1
+
+# CHECK: bb.2:
+# CHECK: S_NOP 0, implicit %4:sub0
+# CHECK: S_NOP 0, implicit undef %4:sub3
+name: loop0
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_32 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sreg_32 }
+ - { id: 3, class: sreg_128 }
+ - { id: 4, class: sreg_128 }
+ - { id: 5, class: sreg_128 }
+body: |
+ bb.0:
+ successors: %bb.1
+ S_NOP 0, implicit-def %0
+ S_NOP 0, implicit-def %1
+ S_NOP 0, implicit-def %2
+ %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ %4 = PHI %3, %bb.0, %5, %bb.1
+
+ ; let's swiffle some lanes around for fun...
+ %5 = REG_SEQUENCE %4:sub0, %subreg.sub0, %4:sub2, %subreg.sub1, %4:sub1, %subreg.sub2, %4:sub3, %subreg.sub3
+
+ S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_NOP 0, implicit %4:sub0
+ S_NOP 0, implicit %4:sub3
+...
+---
+# Check a loop that needs to be traversed multiple times to reach the fixpoint
+# for the used lanes. The example reads sub3 lane at the end, however with each
+# loop iteration we should get 1 more lane marked as we cycles the sublanes
+# along. Sublanes sub0, sub1 and sub3 are rotate in the loop so only sub2
+# should be dead.
+# CHECK-LABEL: name: loop1
+# CHECK: bb.0:
+# CHECK: S_NOP 0, implicit-def %0
+# CHECK: S_NOP 0, implicit-def %1
+# CHECK: S_NOP 0, implicit-def dead %2
+# CHECK: S_NOP 0, implicit-def %3
+# CHECK: %4 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}, %3, {{[0-9]+}}
+
+# CHECK: bb.1:
+# CHECK: %5 = PHI %4, %bb.0, %6, %bb.1
+
+# CHECK: %6 = REG_SEQUENCE %5:sub1, {{[0-9]+}}, %5:sub3, {{[0-9]+}}, undef %5:sub2, {{[0-9]+}}, %5:sub0, {{[0-9]+}}
+
+# CHECK: bb.2:
+# CHECK: S_NOP 0, implicit %6:sub3
+name: loop1
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_32 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sreg_32 }
+ - { id: 3, class: sreg_32 }
+ - { id: 4, class: sreg_128 }
+ - { id: 5, class: sreg_128 }
+ - { id: 6, class: sreg_128 }
+body: |
+ bb.0:
+ successors: %bb.1
+ S_NOP 0, implicit-def %0
+ S_NOP 0, implicit-def %1
+ S_NOP 0, implicit-def dead %2
+ S_NOP 0, implicit-def %3
+ %4 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ %5 = PHI %4, %bb.0, %6, %bb.1
+
+ ; rotate lanes, but skip sub2 lane...
+ %6 = REG_SEQUENCE %5:sub1, %subreg.sub0, %5:sub3, %subreg.sub1, %5:sub2, %subreg.sub2, %5:sub0, %subreg.sub3
+
+ S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_NOP 0, implicit %6:sub3
+...
+---
+# Similar to loop1 test, but check for fixpoint of defined lanes.
+# Lanes are rotate between sub0, sub2, sub3 so only sub1 should be dead/undef.
+# CHECK-LABEL: name: loop2
+# CHECK: bb.0:
+# CHECK: S_NOP 0, implicit-def %0
+# CHECK: %1 = REG_SEQUENCE %0, {{[0-9]+}}
+
+# CHECK: bb.1:
+# CHECK: %2 = PHI %1, %bb.0, %3, %bb.1
+
+# CHECK: %3 = REG_SEQUENCE %2:sub3, {{[0-9]+}}, undef %2:sub1, {{[0-9]+}}, %2:sub0, {{[0-9]+}}, %2:sub2, {{[0-9]+}}
+
+# CHECK: bb.2:
+# CHECK: S_NOP 0, implicit %2:sub0
+# CHECK: S_NOP 0, implicit undef %2:sub1
+# CHECK: S_NOP 0, implicit %2:sub2
+# CHECK: S_NOP 0, implicit %2:sub3
+name: loop2
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_32 }
+ - { id: 1, class: sreg_128 }
+ - { id: 2, class: sreg_128 }
+ - { id: 3, class: sreg_128 }
+body: |
+ bb.0:
+ successors: %bb.1
+ S_NOP 0, implicit-def %0
+ %1 = REG_SEQUENCE %0, %subreg.sub0
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ %2 = PHI %1, %bb.0, %3, %bb.1
+
+ ; rotate subreg lanes, skipping sub1
+ %3 = REG_SEQUENCE %2:sub3, %subreg.sub0, %2:sub1, %subreg.sub1, %2:sub0, %subreg.sub2, %2:sub2, %subreg.sub3
+
+ S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_NOP 0, implicit %2:sub0
+ S_NOP 0, implicit undef %2:sub1
+ S_NOP 0, implicit %2:sub2
+ S_NOP 0, implicit %2:sub3
+...
OpenPOWER on IntegriCloud