AMDGPU: Split block for si_end_cf

Relying on no spill or other code being inserted before this was precarious. It relied on code diligently checking isBasicBlockPrologue which is likely to be forgotten. Ideally this could be done earlier, but this doesn't work because of phis. Any other instruction can't be placed before them, so we have to accept the position being incorrect during SSA. This avoids regressions in the fast register allocator rewrite from inverting the direction. llvm-svn: 357634
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-04-03 20:53:20 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-04-03 20:53:20 +0000
commit: 396653f8a1fb5db910599e29557a47cfd85ca119 (patch)
tree: 716d00da0da7375e59933aa073e833320c385016 /llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
parent: 060bf99f493ea5515fa09dec8dc949dc6d57db09 (diff)
download: bcm5719-llvm-396653f8a1fb5db910599e29557a47cfd85ca119.tar.gz
bcm5719-llvm-396653f8a1fb5db910599e29557a47cfd85ca119.zip
1 files changed, 112 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index a399f7715a2..02b47e869bf 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -55,6 +55,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -79,8 +80,11 @@ class SILowerControlFlow : public MachineFunctionPass {
 private:
   const SIRegisterInfo *TRI = nullptr;
   const SIInstrInfo *TII = nullptr;
-  LiveIntervals *LIS = nullptr;
   MachineRegisterInfo *MRI = nullptr;
+  LiveIntervals *LIS = nullptr;
+  MachineDominatorTree *DT = nullptr;
+  MachineLoopInfo *MLI = nullptr;
+
 
   void emitIf(MachineInstr &MI);
   void emitElse(MachineInstr &MI);
@@ -111,7 +115,7 @@ public:
     AU.addPreservedID(LiveVariablesID);
     AU.addPreservedID(MachineLoopInfoID);
     AU.addPreservedID(MachineDominatorsID);
-    AU.setPreservesCFG();
+
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
@@ -388,23 +392,99 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
+// Insert \p Inst (which modifies exec) at \p InsPt in \p MBB, such that \p MBB
+// is split as necessary to keep the exec modification in its own block.
+static MachineBasicBlock *insertInstWithExecFallthrough(MachineBasicBlock &MBB,
+                                                        MachineInstr &MI,
+                                                        MachineInstr *NewMI,
+                                                        MachineDominatorTree *DT,
+                                                        LiveIntervals *LIS,
+                                                        MachineLoopInfo *MLI) {
+  assert(NewMI->isTerminator());
+
+  MachineBasicBlock::iterator InsPt = MI.getIterator();
+  if (std::next(MI.getIterator()) == MBB.end()) {
+    // Don't bother with a new block.
+    MBB.insert(InsPt, NewMI);
+    if (LIS)
+      LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+    MI.eraseFromParent();
+    return &MBB;
+  }
+
+  MachineFunction *MF = MBB.getParent();
+  MachineBasicBlock *SplitMBB
+    = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  MF->insert(++MachineFunction::iterator(MBB), SplitMBB);
+
+  // FIXME: This is working around a MachineDominatorTree API defect.
+  //
+  // If a previous pass split a critical edge, it may not have been applied to
+  // the DomTree yet. applySplitCriticalEdges is lazily applied, and inspects
+  // the CFG of the given block. Make sure to call a dominator tree method that
+  // will flush this cache before touching the successors of the block.
+  MachineDomTreeNode *NodeMBB = nullptr;
+  if (DT)
+    NodeMBB = DT->getNode(&MBB);
+
+  // Move everything to the new block, except the end_cf pseudo.
+  SplitMBB->splice(SplitMBB->begin(), &MBB, MBB.begin(), MBB.end());
+
+  SplitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+  MBB.addSuccessor(SplitMBB, BranchProbability::getOne());
+
+  MBB.insert(MBB.end(), NewMI);
+
+  if (DT) {
+    std::vector<MachineDomTreeNode *> Children = NodeMBB->getChildren();
+    DT->addNewBlock(SplitMBB, &MBB);
+
+    // Reparent all of the children to the new block body.
+    auto *SplitNode = DT->getNode(SplitMBB);
+    for (auto *Child : Children)
+      DT->changeImmediateDominator(Child, SplitNode);
+  }
+
+  if (MLI) {
+    if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
+      Loop->addBasicBlockToLoop(SplitMBB, MLI->getBase());
+  }
+
+  if (LIS) {
+    LIS->insertMBBInMaps(SplitMBB);
+    LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+  }
+
+  // All live-ins are forwarded.
+  for (auto &LiveIn : MBB.liveins())
+    SplitMBB->addLiveIn(LiveIn);
+
+  MI.eraseFromParent();
+  return SplitMBB;
+}
+
 void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
 
   MachineBasicBlock::iterator InsPt = MBB.begin();
-  MachineInstr *NewMI =
-      BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
-          .add(MI.getOperand(0));
 
+  // First, move the instruction. It's unnecessarily difficult to update
+  // LiveIntervals when there's a change in control flow, so move the
+  // instruction before changing the blocks.
+  MBB.splice(InsPt, &MBB, MI.getIterator());
   if (LIS)
-    LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+    LIS->handleMove(MI);
 
-  MI.eraseFromParent();
+  MachineFunction *MF = MBB.getParent();
 
-  if (LIS)
-    LIS->handleMove(*NewMI);
+  // Create instruction without inserting it yet.
+  MachineInstr *NewMI
+    = BuildMI(*MF, DL, TII->get(AMDGPU::S_OR_B64_term), AMDGPU::EXEC)
+    .addReg(AMDGPU::EXEC)
+    .add(MI.getOperand(0));
+  insertInstWithExecFallthrough(MBB, MI, NewMI, DT, LIS, MLI);
 }
 
 // Returns replace operands for a logical operation, either single result
@@ -470,17 +550,20 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
 
   // This doesn't actually need LiveIntervals, but we can preserve them.
   LIS = getAnalysisIfAvailable<LiveIntervals>();
+  DT = getAnalysisIfAvailable<MachineDominatorTree>();
+  MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
   MRI = &MF.getRegInfo();
 
   MachineFunction::iterator NextBB;
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
        BI != BE; BI = NextBB) {
     NextBB = std::next(BI);
-    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock *MBB = &*BI;
 
     MachineBasicBlock::iterator I, Next, Last;
 
-    for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
+    for (I = MBB->begin(), Last = MBB->end(); I != MBB->end(); I = Next) {
       Next = std::next(I);
       MachineInstr &MI = *I;
 
@@ -501,10 +584,24 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
         emitLoop(MI);
         break;
 
-      case AMDGPU::SI_END_CF:
+      case AMDGPU::SI_END_CF: {
+        MachineInstr *NextMI = nullptr;
+
+        if (Next != MBB->end())
+          NextMI = &*Next;
+
         emitEndCf(MI);
-        break;
 
+        if (NextMI) {
+          MBB = NextMI->getParent();
+          Next = NextMI->getIterator();
+          Last = MBB->end();
+        }
+
+        NextBB = std::next(MBB->getIterator());
+        BE = MF.end();
+        break;
+      }
       case AMDGPU::S_AND_B64:
       case AMDGPU::S_OR_B64:
         // Cleanup bit manipulations on exec mask
@@ -518,7 +615,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
       }
 
       // Replay newly inserted code to combine masks
-      Next = (Last == MBB.end()) ? MBB.begin() : Last;
+      Next = (Last == MBB->end()) ? MBB->begin() : Last;
     }
   }
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-04-03 20:53:20 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-04-03 20:53:20 +0000
commit	396653f8a1fb5db910599e29557a47cfd85ca119 (patch)
tree	716d00da0da7375e59933aa073e833320c385016 /llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
parent	060bf99f493ea5515fa09dec8dc949dc6d57db09 (diff)
download	bcm5719-llvm-396653f8a1fb5db910599e29557a47cfd85ca119.tar.gz bcm5719-llvm-396653f8a1fb5db910599e29557a47cfd85ca119.zip