diff options
author | Matthias Braun <matze@braunis.de> | 2017-03-02 00:35:08 +0000 |
---|---|---|
committer | Matthias Braun <matze@braunis.de> | 2017-03-02 00:35:08 +0000 |
commit | dbcf9e2ee4602006df8e472e5cd18a64cecbdced (patch) | |
tree | a424829a4f70f31140a3329e6b23ad1572e03e49 /llvm/test | |
parent | 3095856d80620ed8a39f92a76fa5d03c78610bee (diff) | |
download | bcm5719-llvm-dbcf9e2ee4602006df8e472e5cd18a64cecbdced.tar.gz bcm5719-llvm-dbcf9e2ee4602006df8e472e5cd18a64cecbdced.zip |
LiveRegMatrix: Fix some subreg interference checks
Surprisingly, one of the three interference checks in LiveRegMatrix was
using the main live range instead of the apropriate subregister range
resulting in unnecessarily conservative results.
llvm-svn: 296722
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/add.v2i16.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/br_cc.f16.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sub.v2i16.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/subreg_interference.mir | 24 |
6 files changed, 45 insertions, 23 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll index 7431f141032..db3c88a1edc 100644 --- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll @@ -191,10 +191,10 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; GFX9: flat_load_dword [[A:v[0-9]+]] ; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] ; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] -; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: buffer_store_dwordx4 ; VI: flat_load_ushort v[[A_LO:[0-9]+]] @@ -203,9 +203,9 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; VI: flat_load_ushort v[[B_HI:[0-9]+]] ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; VI: v_add_u16_e32 ; VI: v_add_u16_e32 -; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; VI: buffer_store_dwordx4 define void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll index 4ae15e8ea45..3fe6f87c555 100644 --- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll @@ -5,20 +5,19 @@ ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] +; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] +; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] +; SI: v_cmp_nlt_f32_e32 vcc, v[[B_F32]], v[[A_F32]] ; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] ; GCN: s_cbranch_vccnz ; GCN: one{{$}} -; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]] -; SI: s_branch -; VI: buffer_store_short -; VI: s_endpgm +; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[B_F32]] +; GCN: buffer_store_short +; GCN: s_endpgm ; GCN: two{{$}} -; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]] +; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[A_F32]] ; GCN: buffer_store_short v[[B_F16]] ; GCN: s_endpgm define void @br_cc_f16( diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 41f31de18e6..acd4f7ee4a8 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1195,9 +1195,8 @@ define void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double ad ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}} ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}} ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]] -; GCN-DAG: v_mov_b32_e32 v[[NEG_A_LO:[0-9]+]], v[[A_LO]] ; GCN: buffer_store_dword [[RESULT]] -; GCN: buffer_store_dwordx2 v{{\[}}[[NEG_A_LO]]:[[NEG_A_HI]]{{\]}} +; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}} define void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 079a441f81c..e4e634d3395 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -417,10 +417,10 @@ define void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspace(1)* %out, <2 ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] -; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] +; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] -; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] -; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] +; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] +; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] @@ -444,10 +444,10 @@ define void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspace(1)* %out, <2 ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] -; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] +; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] -; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] -; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] +; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] +; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll index 96686cf01ab..b282b510806 100644 --- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -188,10 +188,10 @@ define void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; GFX9: flat_load_dword [[A:v[0-9]+]] ; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] ; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] -; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: buffer_store_dwordx4 ; VI: flat_load_ushort v[[A_LO:[0-9]+]] @@ -199,10 +199,10 @@ define void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; VI: flat_load_ushort v[[B_LO:[0-9]+]] ; VI: flat_load_ushort v[[B_HI:[0-9]+]] -; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; VI: v_subrev_u16_e32 -; VI: v_subrev_u16_e32 -; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; VI-DAG: v_subrev_u16_e32 +; VI-DAG: v_subrev_u16_e32 ; VI: buffer_store_dwordx4 define void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir new file mode 100644 index 00000000000..24d06a576c2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir @@ -0,0 +1,24 @@ +# RUN: llc -o - %s -mtriple=amdgcn--amdhsa -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s +--- +# We should not detect any interference between v0/v1 here and only allocate +# sgpr0-sgpr3. +# +# CHECK-LABEL: func0 +# CHECK: S_NOP 0, implicit-def %sgpr0 +# CHECK: S_NOP 0, implicit-def %sgpr3 +# CHECK: S_NOP 0, implicit-def %sgpr1 +# CHECK: S_NOP 0, implicit-def %sgpr2 +# CHECK: S_NOP 0, implicit %sgpr0, implicit %sgpr3 +# CHECK: S_NOP 0, implicit %sgpr1, implicit %sgpr2 +name: func0 +body: | + bb.0: + S_NOP 0, implicit-def undef %0.sub0 : sreg_128 + S_NOP 0, implicit-def %0.sub3 + S_NOP 0, implicit-def undef %1.sub1 : sreg_128 + S_NOP 0, implicit-def %1.sub2 + + + S_NOP 0, implicit %0.sub0, implicit %0.sub3 + S_NOP 0, implicit %1.sub1, implicit %1.sub2 +... |