diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll | 53 |
1 files changed, 34 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll index 80071e3407e..e7555a67033 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll +++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll @@ -1,28 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s ; Don't crash when the use of an undefined value is only detected by the ; register coalescer because it is hidden with subregister insert/extract. target triple="amdgcn--" -; CHECK-LABEL: foobar: -; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64 -; CHECK-NEXT: s_mov_b32 s2, -1 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v1, s5 -; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc - -; CHECK: BB0_1: -; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec -; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 - -; CHECK: BB0_2: -; CHECK: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_mov_b32 s3, 0xf000 -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 -; CHECK-NEXT: s_endpgm define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind { +; CHECK-LABEL: foobar: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CHECK-NEXT: s_mov_b32 s2, -1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) + +; FIXME: The change related to the fact that +; DetectDeadLanes pass hit "Copy across incompatible class" SGPR -> VGPR in analysis +; and hence it cannot derive the fact that the vector element is unused. +; Such a copies appear because the float4 vectors and their elements in the test are uniform +; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0) + +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 +; CHECK-NEXT: v_mov_b32_e32 v2, s6 +; CHECK-NEXT: v_mov_b32_e32 v3, s7 + +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc +; CHECK-NEXT: ; mask branch BB0_2 +; CHECK-NEXT: BB0_1: ; %ift +; CHECK-NEXT: s_mov_b32 s4, s5 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 +; CHECK-NEXT: v_mov_b32_e32 v2, s6 +; CHECK-NEXT: v_mov_b32_e32 v3, s7 +; CHECK-NEXT: BB0_2: ; %ife +; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] +; CHECK-NEXT: s_mov_b32 s3, 0xf000 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 +; CHECK-NEXT: s_endpgm entry: %v0 = insertelement <4 x float> undef, float %a0, i32 0 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 |