[AMDGPU] SiFixSGPRCopies should not modify non-divergent PHI

Differential revision: https://reviews.llvm.org/D40556 llvm-svn: 319534
author: Alexander Timofeev <Alexander.Timofeev@amd.com> 2017-12-01 11:56:34 +0000
committer: Alexander Timofeev <Alexander.Timofeev@amd.com> 2017-12-01 11:56:34 +0000
commit: c1425c9d6b7685e9e075aa0ffa07ab44bc3833e6 (patch)
tree: f688d4e423bee8d1c8e6ca9a062edca33d70042e /llvm/test/CodeGen/AMDGPU
parent: 11ce6e6a83e42eb2d9871f76158ccf41238074aa (diff)
download: bcm5719-llvm-c1425c9d6b7685e9e075aa0ffa07ab44bc3833e6.tar.gz
bcm5719-llvm-c1425c9d6b7685e9e075aa0ffa07ab44bc3833e6.zip
4 files changed, 45 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 3eef06950a4..6f9c043f914 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -89,7 +89,7 @@ endif:
 }
 
 ; GCN-LABEL: {{^}}divergent_loop:
-; VGPR: workitem_private_segment_byte_size = 16{{$}}
+; VGPR: workitem_private_segment_byte_size = 12{{$}}
 
 ; GCN: {{^}}; BB#0:
 
@@ -123,10 +123,9 @@ endif:
 ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
 ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
 ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
-; GCN: v_cmp_ne_u32_e32 vcc,
-; GCN: s_and_b64 vcc, exec, vcc
+; GCN: s_cmp_lg_u32 s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN-NEXT: s_cbranch_vccnz [[LOOP]]
+; GCN-NEXT: s_cbranch_scc1 [[LOOP]]
 
 
 ; GCN: [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-PHI.ll b/llvm/test/CodeGen/AMDGPU/uniform-PHI.ll
new file mode 100644
index 00000000000..3cb86b39a65
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/uniform-PHI.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: BB0_2
+; GCN-NOT: v_readfirstlane
+
+
+target triple = "amdgcn--amdhsa"
+define amdgpu_kernel void @uniform-PHI(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+bb:
+  %tmp = sext i32 %arg2 to i64
+  %tmp3 = tail call i64 @_Z13get_global_idj(i32 0) #2
+  %tmp4 = icmp ugt i64 %tmp3, %tmp
+  %tmp5 = icmp sgt i32 %arg2, 0
+  %tmp6 = and i1 %tmp4, %tmp5
+  br i1 %tmp6, label %bb7, label %bb17
+
+bb7:                                              ; preds = %bb
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb7
+  %tmp9 = phi i32 [ %tmp15, %bb8 ], [ 0, %bb7 ]
+  %tmp10 = phi i32 [ %tmp14, %bb8 ], [ 0, %bb7 ]
+  %tmp11 = zext i32 %tmp9 to i64
+  %tmp12 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp11
+  %tmp13 = load i32, i32 addrspace(1)* %tmp12, align 4
+  %tmp14 = add nsw i32 %tmp13, %tmp10
+  %tmp15 = add nuw nsw i32 %tmp9, 1
+  %tmp16 = icmp eq i32 %tmp15, %arg2
+  br i1 %tmp16, label %bb17, label %bb8
+
+bb17:                                             ; preds = %bb8, %bb
+  %tmp18 = phi i32 [ 0, %bb ], [ %tmp14, %bb8 ]
+  store i32 %tmp18, i32 addrspace(1)* %arg1, align 4
+  ret void
+}
+
+declare i64 @_Z13get_global_idj(i32) local_unnamed_addr #1
+attributes #1 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "target-features"="+16-bit-insts,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { convergent nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
index 82283f39792..1bbda66fddb 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
@@ -6,11 +6,10 @@
 ; CHECK: v_cmp_ne_u32_e32 vcc, 0
 ; CHECK: s_and_saveexec_b64
 ; CHECK-NEXT: ; mask branch
-; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}}
 ; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader
 
 ; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]:
-; CHECK: s_cbranch_vccz [[LOOP_BODY_LABEL]]
+; CHECK: s_cbranch_scc0 [[LOOP_BODY_LABEL]]
 
 ; CHECK: s_endpgm
 define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) {
@@ -35,7 +34,6 @@ out:
 ; CHECK-LABEL: {{^}}test2:
 ; CHECK: s_and_saveexec_b64
 ; CHECK-NEXT: ; mask branch
-; CHECK-NEXT: s_cbranch_execz
 define amdgpu_kernel void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 main_body:
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 7162e818d49..3b0f003f52b 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -162,8 +162,8 @@ exit:
 ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
 ; SI: buffer_load_dword
 ; SI-DAG: buffer_store_dword
-; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100
-; SI: s_cbranch_vccz [[LABEL_LOOP]]
+; SI-DAG: s_cmpk_eq_i32 s{{[0-9]+}}, 0x100
+; SI: s_cbranch_scc0 [[LABEL_LOOP]]
 ; SI: [[LABEL_EXIT]]:
 ; SI: s_endpgm
author	Alexander Timofeev <Alexander.Timofeev@amd.com>	2017-12-01 11:56:34 +0000
committer	Alexander Timofeev <Alexander.Timofeev@amd.com>	2017-12-01 11:56:34 +0000
commit	c1425c9d6b7685e9e075aa0ffa07ab44bc3833e6 (patch)
tree	f688d4e423bee8d1c8e6ca9a062edca33d70042e /llvm/test/CodeGen/AMDGPU
parent	11ce6e6a83e42eb2d9871f76158ccf41238074aa (diff)
download	bcm5719-llvm-c1425c9d6b7685e9e075aa0ffa07ab44bc3833e6.tar.gz bcm5719-llvm-c1425c9d6b7685e9e075aa0ffa07ab44bc3833e6.zip