From dbcf9e2ee4602006df8e472e5cd18a64cecbdced Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 2 Mar 2017 00:35:08 +0000 Subject: LiveRegMatrix: Fix some subreg interference checks Surprisingly, one of the three interference checks in LiveRegMatrix was using the main live range instead of the apropriate subregister range resulting in unnecessarily conservative results. llvm-svn: 296722 --- llvm/test/CodeGen/AMDGPU/add.v2i16.ll | 4 ++-- llvm/test/CodeGen/AMDGPU/br_cc.f16.ll | 15 +++++++------- llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 3 +-- .../test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 12 +++++------ llvm/test/CodeGen/AMDGPU/sub.v2i16.ll | 10 ++++----- llvm/test/CodeGen/AMDGPU/subreg_interference.mir | 24 ++++++++++++++++++++++ 6 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/subreg_interference.mir (limited to 'llvm/test') diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll index 7431f141032..db3c88a1edc 100644 --- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll @@ -191,10 +191,10 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; GFX9: flat_load_dword [[A:v[0-9]+]] ; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] ; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] -; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: buffer_store_dwordx4 ; VI: flat_load_ushort v[[A_LO:[0-9]+]] @@ -202,10 +202,10 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; VI: flat_load_ushort v[[B_LO:[0-9]+]] ; VI: flat_load_ushort v[[B_HI:[0-9]+]] +; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; VI: v_add_u16_e32 ; VI: v_add_u16_e32 -; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; VI: buffer_store_dwordx4 define void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll index 4ae15e8ea45..3fe6f87c555 100644 --- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll @@ -5,20 +5,19 @@ ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] +; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] +; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] +; SI: v_cmp_nlt_f32_e32 vcc, v[[B_F32]], v[[A_F32]] ; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] ; GCN: s_cbranch_vccnz ; GCN: one{{$}} -; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]] -; SI: s_branch -; VI: buffer_store_short -; VI: s_endpgm +; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[B_F32]] +; GCN: buffer_store_short +; GCN: s_endpgm ; GCN: two{{$}} -; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]] +; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[A_F32]] ; GCN: buffer_store_short v[[B_F16]] ; GCN: s_endpgm define void @br_cc_f16( diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 41f31de18e6..acd4f7ee4a8 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1195,9 +1195,8 @@ define void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double ad ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}} ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}} ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]] -; GCN-DAG: v_mov_b32_e32 v[[NEG_A_LO:[0-9]+]], v[[A_LO]] ; GCN: buffer_store_dword [[RESULT]] -; GCN: buffer_store_dwordx2 v{{\[}}[[NEG_A_LO]]:[[NEG_A_HI]]{{\]}} +; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}} define void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 079a441f81c..e4e634d3395 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -417,10 +417,10 @@ define void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspace(1)* %out, <2 ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] -; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] +; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] -; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] -; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] +; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] +; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] @@ -444,10 +444,10 @@ define void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspace(1)* %out, <2 ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] -; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] +; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] -; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] -; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] +; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] +; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll index 96686cf01ab..b282b510806 100644 --- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -188,10 +188,10 @@ define void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; GFX9: flat_load_dword [[A:v[0-9]+]] ; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] ; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] -; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: buffer_store_dwordx4 ; VI: flat_load_ushort v[[A_LO:[0-9]+]] @@ -199,10 +199,10 @@ define void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i ; VI: flat_load_ushort v[[B_LO:[0-9]+]] ; VI: flat_load_ushort v[[B_HI:[0-9]+]] -; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; VI: v_subrev_u16_e32 -; VI: v_subrev_u16_e32 -; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; VI-DAG: v_subrev_u16_e32 +; VI-DAG: v_subrev_u16_e32 ; VI: buffer_store_dwordx4 define void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir new file mode 100644 index 00000000000..24d06a576c2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir @@ -0,0 +1,24 @@ +# RUN: llc -o - %s -mtriple=amdgcn--amdhsa -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s +--- +# We should not detect any interference between v0/v1 here and only allocate +# sgpr0-sgpr3. +# +# CHECK-LABEL: func0 +# CHECK: S_NOP 0, implicit-def %sgpr0 +# CHECK: S_NOP 0, implicit-def %sgpr3 +# CHECK: S_NOP 0, implicit-def %sgpr1 +# CHECK: S_NOP 0, implicit-def %sgpr2 +# CHECK: S_NOP 0, implicit %sgpr0, implicit %sgpr3 +# CHECK: S_NOP 0, implicit %sgpr1, implicit %sgpr2 +name: func0 +body: | + bb.0: + S_NOP 0, implicit-def undef %0.sub0 : sreg_128 + S_NOP 0, implicit-def %0.sub3 + S_NOP 0, implicit-def undef %1.sub1 : sreg_128 + S_NOP 0, implicit-def %1.sub2 + + + S_NOP 0, implicit %0.sub0, implicit %0.sub3 + S_NOP 0, implicit %1.sub1, implicit %1.sub2 +... -- cgit v1.2.3