diff options
author | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2017-04-13 23:17:00 +0000 |
---|---|---|
committer | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2017-04-13 23:17:00 +0000 |
commit | d24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2 (patch) | |
tree | fb06f927219d033affee12be8f4da32198dd9289 /llvm/test/CodeGen | |
parent | f93c58b81bfbe1e4502d8dd0465797fdd8fb400b (diff) | |
download | bcm5719-llvm-d24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2.tar.gz bcm5719-llvm-d24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2.zip |
AMDGPU/GFX9: Do not use v_pack_b32_f16 when packing
Differential Revision: https://reviews.llvm.org/D31819
llvm-svn: 300275
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.round.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/pack.v2f16.ll | 44 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/pack.v2i16.ll | 29 |
5 files changed, 37 insertions, 57 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index cdcc7be8f2f..bc72f4424c9 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,7 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,+fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-FLUSH %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fptrunc_f32_to_f16: ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] @@ -45,10 +44,8 @@ entry: ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]] ; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] -; GFX9-FLUSH: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] -; GFX9-FLUSH: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] - -; GFX9-DENORM: v_pack_b32_f16 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] +; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] +; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm @@ -74,10 +71,8 @@ entry: ; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] -; GFX9-FLUSH: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] -; GFX9-FLUSH: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] - -; GFX9-DENORM: v_pack_b32_f16 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] +; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] +; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll index f56655630be..59e81a7acc0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll @@ -44,7 +44,8 @@ entry: ; GFX9: v_rndne_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]] ; GFX9: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; GFX9: v_rndne_f16_e32 v[[R_F16_1:[0-9]+]], v[[A_F16_1]] -; GFX9: v_pack_b32_f16 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] +; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] +; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.ll index d211ad8ec9f..ffe87977870 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.round.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.ll @@ -87,7 +87,8 @@ define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 { ; GFX89: v_bfi_b32 [[COPYSIGN0:v[0-9]+]], [[K]], [[BFI_K]], ; GFX89: v_bfi_b32 [[COPYSIGN1:v[0-9]+]], [[K]], [[BFI_K]], -; GFX9: v_pack_b32_f16 +; GFX9: v_and_b32_e32 +; GFX9: v_lshl_or_b32 define amdgpu_kernel void @round_v2f16(<2 x half> addrspace(1)* %out, i32 %in.arg) #0 { %in = bitcast i32 %in.arg to <2 x half> %result = call <2 x half> @llvm.round.v2f16(<2 x half> %in) diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll index b8621562713..5a07f7ca6ae 100644 --- a/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll @@ -1,5 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-FLUSH %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s @@ -59,10 +58,9 @@ define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2f16: ; GFX9: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]] -; GFX9-FLUSH: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] +; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -85,10 +83,9 @@ define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1) ; GCN-LABEL: {{^}}v_pack_v2f16_user: ; GFX9: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]] -; GFX9-FLUSH: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] +; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] ; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { @@ -112,11 +109,9 @@ define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspa ; GCN-LABEL: {{^}}v_pack_v2f16_imm_lo: ; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}} -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]] -; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}} -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}} +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -134,10 +129,9 @@ define amdgpu_kernel void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 { ; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_lo: ; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], 4.0, [[VAL1]] -; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}} -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}} +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 { @@ -156,12 +150,10 @@ define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 ; GCN-LABEL: {{^}}v_pack_v2f16_imm_hi: ; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]] -; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] +; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 { @@ -180,11 +172,10 @@ define amdgpu_kernel void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2f16_inline_f16imm_hi: ; GFX9-DAG: flat_load_dword [[VAL:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 1.0 -; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00 -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] +; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00 +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(i32 addrspace(1)* %in0) #0 { @@ -203,10 +194,9 @@ define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(i32 addrspace(1)* %in0) ; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_hi: ; GFX9: flat_load_dword [[VAL:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 64 -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], 64, 16, [[MASKED]] +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 64, 16, [[MASKED]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_inline_imm_hi(i32 addrspace(1)* %in0) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll index 9ffd16754a1..8515fbc6dba 100644 --- a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll @@ -1,5 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-FLUSH %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s @@ -55,10 +54,9 @@ define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(2)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2i16: ; GFX9: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]] -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -79,10 +77,9 @@ define amdgpu_kernel void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1) ; GCN-LABEL: {{^}}v_pack_v2i16_user: ; GFX9: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]] -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] ; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { @@ -105,10 +102,9 @@ define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspa ; GCN-LABEL: {{^}}v_pack_v2i16_imm_lo: ; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] ; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]] -; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}} -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}} +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_imm_lo(i32 addrspace(1)* %in1) #0 { @@ -126,9 +122,8 @@ define amdgpu_kernel void @v_pack_v2i16_imm_lo(i32 addrspace(1)* %in1) #0 { ; GCN-LABEL: {{^}}v_pack_v2i16_inline_imm_lo: ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], 64, [[VAL1]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, 64 +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, 64 ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_inline_imm_lo(i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -145,10 +140,9 @@ define amdgpu_kernel void @v_pack_v2i16_inline_imm_lo(i32 addrspace(1)* %in1) #0 ; GCN-LABEL: {{^}}v_pack_v2i16_imm_hi: ; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]] -; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]] +; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_imm_hi(i32 addrspace(1)* %in0) #0 { @@ -166,8 +160,7 @@ define amdgpu_kernel void @v_pack_v2i16_imm_hi(i32 addrspace(1)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2i16_inline_imm_hi: ; GFX9: flat_load_dword [[VAL:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 7 -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL0]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(i32 addrspace(1)* %in0) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() |