summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll53
1 files changed, 34 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 80071e3407e..e7555a67033 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -1,28 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
; Don't crash when the use of an undefined value is only detected by the
; register coalescer because it is hidden with subregister insert/extract.
target triple="amdgcn--"
-; CHECK-LABEL: foobar:
-; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
-; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
-; CHECK-NEXT: s_mov_b32 s2, -1
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v1, s5
-; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
-
-; CHECK: BB0_1:
-; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec
-; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-
-; CHECK: BB0_2:
-; CHECK: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_mov_b32 s3, 0xf000
-; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
-; CHECK-NEXT: s_endpgm
define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
+; CHECK-LABEL: foobar:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CHECK-NEXT: s_mov_b32 s2, -1
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+
+; FIXME: The change related to the fact that
+; DetectDeadLanes pass hit "Copy across incompatible class" SGPR -> VGPR in analysis
+; and hence it cannot derive the fact that the vector element is unused.
+; Such a copies appear because the float4 vectors and their elements in the test are uniform
+; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0)
+
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: v_mov_b32_e32 v1, s5
+; CHECK-NEXT: v_mov_b32_e32 v2, s6
+; CHECK-NEXT: v_mov_b32_e32 v3, s7
+
+; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; CHECK-NEXT: ; mask branch BB0_2
+; CHECK-NEXT: BB0_1: ; %ift
+; CHECK-NEXT: s_mov_b32 s4, s5
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: v_mov_b32_e32 v1, s5
+; CHECK-NEXT: v_mov_b32_e32 v2, s6
+; CHECK-NEXT: v_mov_b32_e32 v3, s7
+; CHECK-NEXT: BB0_2: ; %ife
+; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT: s_mov_b32 s3, 0xf000
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; CHECK-NEXT: s_endpgm
entry:
%v0 = insertelement <4 x float> undef, float %a0, i32 0
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
OpenPOWER on IntegriCloud