AMDGPU: Recompute scc liveness

The various scalar bit operations set SCC, so one is erased or moved it needs to be recomputed. Not sure why the existing tests don't fail on this. llvm-svn: 312819
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-09-08 18:51:26 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-09-08 18:51:26 +0000
commit: 2f4df7ec414baf9d312ec9e207a7f89b47ace007 (patch)
tree: 0451d3562ae9217af24c7f077e39795f59f656d3
parent: 747b0e2905dc8a2b25d5ae97f5008efcd831795b (diff)
download: bcm5719-llvm-2f4df7ec414baf9d312ec9e207a7f89b47ace007.tar.gz
bcm5719-llvm-2f4df7ec414baf9d312ec9e207a7f89b47ace007.zip
2 files changed, 67 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 19a437f10f1..6b2668fe052 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -142,9 +142,10 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
 
           DEBUG(dbgs() << "Removing no effect instruction: " << *I << '\n');
 
-          for (auto &Op : I->operands())
+          for (auto &Op : I->operands()) {
             if (Op.isReg())
               RecalcRegs.insert(Op.getReg());
+          }
 
           auto Next = std::next(I);
           LIS->RemoveMachineInstrFromMaps(*I);
@@ -193,6 +194,11 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
 
     auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
     unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
+    for (auto &Op : Lead->operands()) {
+      if (Op.isReg())
+        RecalcRegs.insert(Op.getReg());
+    }
+
     LIS->RemoveMachineInstrFromMaps(*Lead);
     Lead->eraseFromParent();
     if (SaveExecReg) {
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 0bb5124b785..f359ffc284a 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -202,8 +202,68 @@ bb.end:                                           ; preds = %bb.then, %bb
   ret void
 }
 
+; Make sure scc liveness is updated if sor_b64 is removed
+; GCN-LABEL: {{^}}scc_liveness:
+
+; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
+; GCN: s_andn2_b64 exec, exec,
+; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
+
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
+; GCN: s_and_b64 exec, exec, vcc
+
+; GCN-NOT: s_or_b64 exec, exec
+
+; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: s_andn2_b64
+; GCN-NEXT: s_cbranch_execnz
+
+; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: s_setpc_b64
+define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %Flow1, %bb1, %bb
+  %tmp = icmp slt i32 %arg, 519
+  br i1 %tmp, label %bb2, label %bb1
+
+bb2:                                              ; preds = %bb1
+  %tmp3 = icmp eq i32 %arg, 0
+  br i1 %tmp3, label %bb4, label %bb10
+
+bb4:                                              ; preds = %bb2
+  %tmp6 = load float, float* undef
+  %tmp7 = fcmp olt float %tmp6, 0.0
+  br i1 %tmp7, label %bb8, label %Flow
+
+bb8:                                              ; preds = %bb4
+  %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1
+  br label %Flow
+
+Flow:                                             ; preds = %bb8, %bb4
+  %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ]
+  br label %bb10
+
+bb10:                                             ; preds = %Flow, %bb2
+  %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ]
+  br i1 %tmp3, label %bb12, label %Flow1
+
+Flow1:                                            ; preds = %bb10
+  br label %bb1
+
+bb12:                                             ; preds = %bb10
+  store volatile <4 x float> %tmp11, <4 x float>* undef, align 16
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare void @llvm.amdgcn.s.barrier() #1
 
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind convergent }
+attributes #2 = { nounwind }
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-09-08 18:51:26 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-09-08 18:51:26 +0000
commit	2f4df7ec414baf9d312ec9e207a7f89b47ace007 (patch)
tree	0451d3562ae9217af24c7f077e39795f59f656d3
parent	747b0e2905dc8a2b25d5ae97f5008efcd831795b (diff)
download	bcm5719-llvm-2f4df7ec414baf9d312ec9e207a7f89b47ace007.tar.gz bcm5719-llvm-2f4df7ec414baf9d312ec9e207a7f89b47ace007.zip