1 files changed, 103 insertions, 66 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index fb151b4ffdc..cc16d931dcf 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -302,6 +302,52 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   return true;
 }
 
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+                               const MachineRegisterInfo &MRI,
+                               const SIRegisterInfo *TRI,
+                               const SIInstrInfo *TII) {
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+      return true;
+  }
+  return false;
+}
+
+static bool phiHasBreakDef(const MachineInstr &PHI,
+                           const MachineRegisterInfo &MRI,
+                           SmallSet<unsigned, 8> &Visited) {
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (Visited.count(Reg))
+      continue;
+
+    Visited.insert(Reg);
+
+    MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+    switch (DefInstr->getOpcode()) {
+    default:
+      break;
+    case AMDGPU::SI_IF_BREAK:
+      return true;
+    case AMDGPU::PHI:
+      if (phiHasBreakDef(*DefInstr, MRI, Visited))
+        return true;
+    }
+  }
+  return false;
+}
+
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+                                          const TargetRegisterInfo &TRI) {
+  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+       E = MBB.end(); I != E; ++I) {
+    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+      return true;
+  }
+  return false;
+}
+
 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
                                     const MachineInstr *MoveImm,
                                     const SIInstrInfo *TII,
@@ -363,6 +409,12 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
   return false;
 }
 
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+                                        const TargetRegisterInfo *TRI) {
+  return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+           return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
 // Checks if there is potential path From instruction To instruction.
 // If CutOff is specified and it sits in between of that path we ignore
 // a higher portion of the path and report it is not reachable.
@@ -569,77 +621,62 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         break;
       }
       case AMDGPU::PHI: {
-        unsigned hasVGPRUses = 0;
-        SetVector<const MachineInstr *> worklist;
-        worklist.insert(&MI);
-        while (!worklist.empty()) {
-          const MachineInstr *Instr = worklist.pop_back_val();
-          unsigned Reg = Instr->getOperand(0).getReg();
-          for (const auto &Use : MRI.use_operands(Reg)) {
-            const MachineInstr *UseMI = Use.getParent();
-            if (UseMI->isCopy() || UseMI->isRegSequence()) {
-              if (UseMI->isCopy() &&
-                  TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
-                  !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
-                hasVGPRUses++;
-              }
-              worklist.insert(UseMI);
-              continue;
-            }
-
-            if (UseMI->isPHI()) {
-              const TargetRegisterClass *UseRC = MRI.getRegClass(Use.getReg());
-              if (!TRI->isSGPRReg(MRI, Use.getReg()) &&
-                  UseRC != &AMDGPU::VReg_1RegClass)
-                hasVGPRUses++;
-              continue;
-            }
-
-            unsigned OpNo = UseMI->getOperandNo(&Use);
-            const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
-            if (!Desc.isPseudo() && Desc.OpInfo &&
-                OpNo < Desc.getNumOperands() &&
-                Desc.OpInfo[OpNo].RegClass != -1) {
-              const TargetRegisterClass *OpRC =
-                  TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
-              if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
-                  OpRC != &AMDGPU::VS_64RegClass) {
-                hasVGPRUses++;
-              }
-            }
-          }
-        }
-        bool hasVGPRInput = false;
-        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
-          unsigned InputReg = MI.getOperand(i).getReg();
-          MachineInstr *Def = MRI.getVRegDef(InputReg);
-          if (TRI->isVGPR(MRI, InputReg)) {
-            if (Def->isCopy()) {
-              unsigned SrcReg = Def->getOperand(1).getReg();
-              const TargetRegisterClass *RC =
-                  TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
-                                                 : TRI->getPhysRegClass(SrcReg);
-              if (TRI->isSGPRClass(RC))
-                continue;
-            }
-            hasVGPRInput = true;
-            break;
-          } else if (Def->isCopy() &&
-                     TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
-            hasVGPRInput = true;
+        unsigned Reg = MI.getOperand(0).getReg();
+        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+          break;
+
+        // We don't need to fix the PHI if the common dominator of the
+        // two incoming blocks terminates with a uniform branch.
+        bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+        if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
+          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+          if (!predsHasDivergentTerminator(MBB0, TRI) &&
+              !predsHasDivergentTerminator(MBB1, TRI)) {
+            LLVM_DEBUG(dbgs()
+                       << "Not fixing PHI for uniform branch: " << MI << '\n');
             break;
           }
         }
-        unsigned PHIRes = MI.getOperand(0).getReg();
-        const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
 
-        if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
-            (hasVGPRInput || hasVGPRUses > 1)) {
+        // If a PHI node defines an SGPR and any of its operands are VGPRs,
+        // then we need to move it to the VALU.
+        //
+        // Also, if a PHI node defines an SGPR and has all SGPR operands
+        // we must move it to the VALU, because the SGPR operands will
+        // all end up being assigned the same register, which means
+        // there is a potential for a conflict if different threads take
+        // different control flow paths.
+        //
+        // For Example:
+        //
+        // sgpr0 = def;
+        // ...
+        // sgpr1 = def;
+        // ...
+        // sgpr2 = PHI sgpr0, sgpr1
+        // use sgpr2;
+        //
+        // Will Become:
+        //
+        // sgpr2 = def;
+        // ...
+        // sgpr2 = def;
+        // ...
+        // use sgpr2
+        //
+        // The one exception to this rule is when one of the operands
+        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+        // instruction.  In this case, there we know the program will
+        // never enter the second block (the loop) without entering
+        // the first block (where the condition is computed), so there
+        // is no chance for values to be over-written.
+
+        SmallSet<unsigned, 8> Visited;
+        if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
           LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
-          TII->moveToVALU(MI);
-        } else {
-          LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
-          TII->legalizeOperands(MI, MDT);
+          TII->moveToVALU(MI, MDT);
         }
 
         break;