AMDGPU/SI: Fix visit order assumption in SIFixSGPRCopies

Summary: This pass was assuming that when a PHI instruction defined a register used by another PHI instruction that the defining insstruction would be legalized before the using instruction. This assumption was causing the pass to not legalize some PHI nodes within divergent flow-control. This fixes a bug that was uncovered by r285762. Reviewers: nhaehnle, arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D26303 llvm-svn: 286676
author: Tom Stellard <thomas.stellard@amd.com> 2016-11-11 23:35:42 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2016-11-11 23:35:42 +0000
commit: 9fdbec870c0f3a0c7aa8185f577bf01e4abc5b14 (patch)
tree: ca7cb7c351d35ff890cd9c784b98ad5fd70be830
parent: d420224daca522e6a5796861ed9724ad9095a4ba (diff)
download: bcm5719-llvm-9fdbec870c0f3a0c7aa8185f577bf01e4abc5b14.tar.gz
bcm5719-llvm-9fdbec870c0f3a0c7aa8185f577bf01e4abc5b14.zip
3 files changed, 108 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 288e5b1524f..5e822453f4c 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -234,6 +234,46 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   return true;
 }
 
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+                               const MachineRegisterInfo &MRI,
+                               const SIRegisterInfo *TRI,
+                               const SIInstrInfo *TII) {
+
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+      return true;
+  }
+  return false;
+}
+static bool phiHasBreakDef(const MachineInstr &PHI,
+                           const MachineRegisterInfo &MRI,
+                           SmallSet<unsigned, 8> &Visited) {
+
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (Visited.count(Reg))
+      continue;
+
+    Visited.insert(Reg);
+
+    MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
+    assert(DefInstr);
+    switch (DefInstr->getOpcode()) {
+    default:
+      break;
+    case AMDGPU::SI_BREAK:
+    case AMDGPU::SI_IF_BREAK:
+    case AMDGPU::SI_ELSE_BREAK:
+      return true;
+    case AMDGPU::PHI:
+      if (phiHasBreakDef(*DefInstr, MRI, Visited))
+        return true;
+    }
+  }
+  return false;
+}
+
 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -311,31 +351,11 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         // the first block (where the condition is computed), so there
         // is no chance for values to be over-written.
 
-        bool HasBreakDef = false;
-        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
-          unsigned Reg = MI.getOperand(i).getReg();
-          if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
-            TII->moveToVALU(MI);
-            break;
-          }
-          MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
-          assert(DefInstr);
-          switch(DefInstr->getOpcode()) {
-
-          case AMDGPU::SI_BREAK:
-          case AMDGPU::SI_IF_BREAK:
-          case AMDGPU::SI_ELSE_BREAK:
-          // If we see a PHI instruction that defines an SGPR, then that PHI
-          // instruction has already been considered and should have
-          // a *_BREAK as an operand.
-          case AMDGPU::PHI:
-            HasBreakDef = true;
-            break;
-          }
-        }
-
-        if (!SGPRBranch && !HasBreakDef)
+        SmallSet<unsigned, 8> Visited;
+        if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
+            (!SGPRBranch && !phiHasBreakDef(MI, MRI, Visited))) {
           TII->moveToVALU(MI);
+        }
         break;
       }
       case AMDGPU::REG_SEQUENCE: {
diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
index fd73ff86af5..ff013060dce 100644
--- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -457,5 +457,26 @@ bb7:                                              ; preds = %bb3
   ret void
 }
 
+; GCN-LABEL: {{^}}phi_visit_order:
+; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 1, v{{[0-9]+}}
+define void @phi_visit_order() {
+bb:
+  br label %bb1
+
+bb1:
+  %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb4 ]
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %cnd = icmp eq i32 %tid, 0
+  br i1 %cnd, label %bb4, label %bb2
+
+bb2:
+  %tmp3 = add nsw i32 %tmp, 1
+  br label %bb4
+
+bb4:
+  %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ]
+  br label %bb1
+}
+
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir
new file mode 100644
index 00000000000..016a6e6fd06
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir
@@ -0,0 +1,43 @@
+# RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN
+
+--- |
+  define void @phi_visit_order() { ret void }
+
+name: phi_visit_order
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sreg_32 }
+  - { id: 1, class: sreg_64 }
+  - { id: 2, class: sreg_32 }
+  - { id: 7, class: vgpr_32 }
+  - { id: 8, class: sreg_32 }
+  - { id: 9, class: vgpr_32 }
+  - { id: 10, class: sreg_64 }
+  - { id: 11, class: sreg_32 }
+
+body: |
+  ; GCN-LABEL: name: phi_visit_order
+  ; GCN: V_ADD_I32
+  bb.0:
+    liveins: %vgpr0
+    successors: %bb.1
+    %7 = COPY %vgpr0
+    %8 = S_MOV_B32 0
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    %0 = PHI %8, %bb.0, %0, %bb.1, %2, %bb.2
+    %9 = V_MOV_B32_e32 9, implicit %exec
+    %10 = V_CMP_EQ_U32_e64 %7, %9, implicit %exec
+    %1 = SI_IF %10, %bb.2, implicit-def %exec, implicit-def %scc, implicit %exec
+    S_BRANCH %bb.1
+
+  bb.2:
+    successors: %bb.1
+    SI_END_CF %1, implicit-def %exec, implicit-def %scc, implicit %exec
+    %11 = S_MOV_B32 1
+    %2 = S_ADD_I32 %0, %11, implicit-def %scc
+    S_BRANCH %bb.1
+
+...
+---
author	Tom Stellard <thomas.stellard@amd.com>	2016-11-11 23:35:42 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2016-11-11 23:35:42 +0000
commit	9fdbec870c0f3a0c7aa8185f577bf01e4abc5b14 (patch)
tree	ca7cb7c351d35ff890cd9c784b98ad5fd70be830
parent	d420224daca522e6a5796861ed9724ad9095a4ba (diff)
download	bcm5719-llvm-9fdbec870c0f3a0c7aa8185f577bf01e4abc5b14.tar.gz bcm5719-llvm-9fdbec870c0f3a0c7aa8185f577bf01e4abc5b14.zip