diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-01-05 03:40:16 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-01-05 03:40:16 +0000 |
commit | 5cd09ade383557a2ff2ad828ad3e00e024cef131 (patch) | |
tree | b61caf66f3946dff75ea26d64c19b8c57a128f11 /llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | |
parent | 0d808888c141971d8750cf151029db2131aca5a1 (diff) | |
download | bcm5719-llvm-5cd09ade383557a2ff2ad828ad3e00e024cef131.tar.gz bcm5719-llvm-5cd09ade383557a2ff2ad828ad3e00e024cef131.zip |
AMDGPU/SI: Select non-uniform constant addrspace loads to flat instructions for HSA
Summary: This fixes a regression caused by r256282.
Reviewers: arsenm, cfang
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D15736
llvm-svn: 256810
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/salu-to-valu.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | 269 |
1 files changed, 151 insertions, 118 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index a30c25e700a..551f34339a1 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.r600.read.tidig.y() #0 @@ -18,8 +19,10 @@ declare i32 @llvm.r600.read.tidig.y() #0 ; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_* ; instructions -; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 -; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} +; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { entry: @@ -50,8 +53,10 @@ done: ; preds = %loop ; Test moving an SMRD instruction to the VALU ; GCN-LABEL: {{^}}smrd_valu: +; FIXME: We should be using flat load for HSA. ; GCN: buffer_load_dword [[OUT:v[0-9]+]] -; GCN: buffer_store_dword [[OUT]] +; GCN-NOHSA: buffer_store_dword [[OUT]] +; GCN-HSA: flat_store_dword [[OUT]] define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { entry: %tmp = icmp ne i32 %a, 0 @@ -77,8 +82,9 @@ endif: ; preds = %else, %if ; Test moving an SMRD with an immediate offset to the VALU ; GCN-LABEL: {{^}}smrd_valu2: -; GCN-NOT: v_add -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} +; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -91,12 +97,14 @@ entry: ; Use a big offset that will use the SMRD literal offset on CI ; GCN-LABEL: {{^}}smrd_valu_ci_offset: -; GCN-NOT: v_add -; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} -; GCN: v_add_i32_e32 -; GCN: buffer_store_dword +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: buffer_store_dword +; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-HSA: flat_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -109,13 +117,14 @@ entry: } ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2: -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: buffer_store_dwordx2 +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: buffer_store_dwordx2 +; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -128,15 +137,16 @@ entry: } ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4: -; GCN-NOT: v_add -; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: buffer_store_dwordx4 +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -152,25 +162,27 @@ entry: ; CI. ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} - -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} + +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -184,35 +196,40 @@ entry: ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} -; GCN-NOT: v_add -; GCN: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} -; GCN-NOT: v_add -; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} - -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} + +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 ; GCN: s_endpgm define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { @@ -227,9 +244,11 @@ entry: } ; GCN-LABEL: {{^}}smrd_valu2_salu_user: -; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; GCN-NOHSA: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; GCN-HSA: flat_load_dword [[MOVED:v[0-9]+]], v[{{[0-9+:[0-9]+}}] ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] -; GCN: buffer_store_dword [[ADD]] +; GCN-NOHSA: buffer_store_dword [[ADD]] +; GCN-HSA: flat_store_dword [[ADD]] define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -242,7 +261,8 @@ entry: } ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} +; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}} define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -254,8 +274,9 @@ entry: } ; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset: -; GCN-NOT: v_add -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} +; GCN-NOHSA-NOT: v_add +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} +; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 @@ -267,8 +288,10 @@ entry: } ; GCN-LABEL: {{^}}s_load_imm_v8i32: -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() @@ -280,16 +303,18 @@ entry: } ; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user: -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: buffer_store_dword +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: buffer_store_dword +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() @@ -319,10 +344,14 @@ entry: } ; GCN-LABEL: {{^}}s_load_imm_v16i32: -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -334,26 +363,30 @@ entry: } ; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user: -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: buffer_load_dwordx4 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: v_add_i32_e32 -; GCN: buffer_store_dword +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: v_add_i32_e32 +; GCN-NOHSA: buffer_store_dword +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 |