AMDGPU: Use set for tracked registers

The majority of the time spent in the pass checking for the register reads. Rather than searching all of the defined registers for uses in each instruction, use a set of defined registers and check the operands of the instruction. This process still is algorithmically not great, but with the additional trick of skipping the analysis for addresses with one use, this brings one slow testcase into a reasonable range. llvm-svn: 312206
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-08-31 01:53:09 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-08-31 01:53:09 +0000
commit: 67e72dee795c477c9636a78498d24e76e9a8d7a4 (patch)
tree: 2f1062b5cbca2bf65e662ffae8c869183f5e2335 /llvm/lib
parent: 1aad27e17bc40273849802240439f6e7d49ac81d (diff)
download: bcm5719-llvm-67e72dee795c477c9636a78498d24e76e9a8d7a4.tar.gz
bcm5719-llvm-67e72dee795c477c9636a78498d24e76e9a8d7a4.zip
1 files changed, 23 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 1b2e5e6d0f7..8b3fdd87438 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -141,36 +141,35 @@ static void moveInstsAfter(MachineBasicBlock::iterator I,
   }
 }
 
-static void addDefsToList(const MachineInstr &MI,
-                          SmallVectorImpl<const MachineOperand *> &Defs) {
-  for (const MachineOperand &Def : MI.defs()) {
-    Defs.push_back(&Def);
-  }
+static void addDefsToList(const MachineInstr &MI, DenseSet<unsigned> &Defs) {
+  // XXX: Should this be looking for implicit defs?
+  for (const MachineOperand &Def : MI.defs())
+    Defs.insert(Def.getReg());
 }
 
 static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
                                       MachineBasicBlock::iterator B,
                                       const SIInstrInfo *TII,
                                       AliasAnalysis * AA) {
-  return (TII->areMemAccessesTriviallyDisjoint(*A, *B, AA) ||
-    // RAW or WAR - cannot reorder
-    // WAW - cannot reorder
-    // RAR - safe to reorder
-    !(A->mayStore() || B->mayStore()));
+  // RAW or WAR - cannot reorder
+  // WAW - cannot reorder
+  // RAR - safe to reorder
+  return !(A->mayStore() || B->mayStore()) ||
+    TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
 }
 
 // Add MI and its defs to the lists if MI reads one of the defs that are
 // already in the list. Returns true in that case.
 static bool
 addToListsIfDependent(MachineInstr &MI,
-                      SmallVectorImpl<const MachineOperand *> &Defs,
+                      DenseSet<unsigned> &Defs,
                       SmallVectorImpl<MachineInstr*> &Insts) {
-  for (const MachineOperand *Def : Defs) {
-    bool ReadDef = MI.readsVirtualRegister(Def->getReg());
-    // If ReadDef is true, then there is a use of Def between I
-    // and the instruction that I will potentially be merged with. We
-    // will need to move this instruction after the merged instructions.
-    if (ReadDef) {
+  for (MachineOperand &Use : MI.operands()) {
+    // If one of the defs is read, then there is a use of Def between I and the
+    // instruction that I will potentially be merged with. We will need to move
+    // this instruction after the merged instructions.
+
+    if (Use.isReg() && Use.readsReg() && Defs.count(Use.getReg())) {
       Insts.push_back(&MI);
       addDefsToList(MI, Defs);
       return true;
@@ -249,7 +248,8 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
 }
 
 bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
-  MachineBasicBlock::iterator E = CI.I->getParent()->end();
+  MachineBasicBlock *MBB = CI.I->getParent();
+  MachineBasicBlock::iterator E = MBB->end();
   MachineBasicBlock::iterator MBBI = CI.I;
 
   int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
@@ -258,12 +258,13 @@ bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
 
   // We only ever merge operations with the same base address register, so don't
   // bother scanning forward if there are no other uses.
-  if (MRI->hasOneNonDBGUse(AddrReg0.getReg()))
+  if (TargetRegisterInfo::isPhysicalRegister(AddrReg0.getReg()) ||
+      MRI->hasOneNonDBGUse(AddrReg0.getReg()))
     return false;
 
   ++MBBI;
 
-  SmallVector<const MachineOperand *, 8> DefsToMove;
+  DenseSet<unsigned> DefsToMove;
   addDefsToList(*CI.I, DefsToMove);
 
   for ( ; MBBI != E; ++MBBI) {
@@ -547,6 +548,8 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
+  assert(MRI->isSSA() && "Must be run on SSA");
+
   DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
 
   bool Modified = false;
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-08-31 01:53:09 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-08-31 01:53:09 +0000
commit	67e72dee795c477c9636a78498d24e76e9a8d7a4 (patch)
tree	2f1062b5cbca2bf65e662ffae8c869183f5e2335 /llvm/lib
parent	1aad27e17bc40273849802240439f6e7d49ac81d (diff)
download	bcm5719-llvm-67e72dee795c477c9636a78498d24e76e9a8d7a4.tar.gz bcm5719-llvm-67e72dee795c477c9636a78498d24e76e9a8d7a4.zip