diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2016-02-20 00:37:25 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2016-02-20 00:37:25 +0000 |
| commit | 467b5b9024ec1e9348ad24ea736eb9a94c653bb0 (patch) | |
| tree | cebb53aa1ed4c8393b64f4e31218db3f58f91b52 /llvm/test/CodeGen | |
| parent | e611698e845c1d019960230930966632b911abe7 (diff) | |
| download | bcm5719-llvm-467b5b9024ec1e9348ad24ea736eb9a94c653bb0.tar.gz bcm5719-llvm-467b5b9024ec1e9348ad24ea736eb9a94c653bb0.zip | |
AMDGPU/SI: Use v_readfirstlane to legalize SMRD with VGPR base pointer
Summary:
Instead of trying to replace SMRD instructions with a VGPR base pointer
with an equivalent MUBUF instruction, we now copy the base pointer to
SGPRs using v_readfirstlane.
This is safe to do, because any load selected as an SMRD instruction
has been proven to have a uniform base pointer, so each thread in the
wave will have the same pointer value in VGPRs.
This will fix some errors on VI from trying to replace SMRD instructions
with addr64-enabled MUBUF instructions that don't exist.
Reviewers: arsenm, cfang, nhaehnle
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D17305
llvm-svn: 261385
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/missing-store.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll | 19 |
3 files changed, 21 insertions, 14 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/missing-store.ll b/llvm/test/CodeGen/AMDGPU/missing-store.ll index 4af9cdf1b96..c919b3b5819 100644 --- a/llvm/test/CodeGen/AMDGPU/missing-store.ll +++ b/llvm/test/CodeGen/AMDGPU/missing-store.ll @@ -8,7 +8,9 @@ ; FUNC-LABEL: {{^}}missing_store_reduced: ; SI: ds_read_b64 ; SI: buffer_store_dword -; SI: buffer_load_dword +; SI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} +; SI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; SI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}} ; SI: buffer_store_dword ; SI: s_endpgm define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index eebd06e6ee9..abd6b7a2c21 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -53,10 +53,14 @@ done: ; preds = %loop ; Test moving an SMRD instruction to the VALU ; GCN-LABEL: {{^}}smrd_valu: -; FIXME: We should be using flat load for HSA. -; GCN: buffer_load_dword [[OUT:v[0-9]+]] -; GCN-NOHSA: buffer_store_dword [[OUT]] -; GCN-HSA: flat_store_dword {{.*}}, [[OUT]] +; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0 +; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} +; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]] +; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8 +; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]] +; GCN-NOHSA: buffer_store_dword [[V_OUT]] +; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]] define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { entry: %tmp = icmp ne i32 %a, 0 diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 416f3969f6a..b1277da6156 100644 --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -70,15 +70,14 @@ define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace ret void } -; Technically we could reorder these, but just comparing the -; instruction type of the load is insufficient. - -; FUNC-LABEL: @no_reorder_constant_load_global_store_constant_load -; CI: buffer_load_dword +; FUNC-LABEL: @reorder_constant_load_global_store_constant_load ; CI: buffer_store_dword -; CI: buffer_load_dword +; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} +; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1 +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x2 ; CI: buffer_store_dword -define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { +define void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8 %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1 @@ -95,8 +94,10 @@ define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1 } ; FUNC-LABEL: @reorder_constant_load_local_store_constant_load -; CI: buffer_load_dword -; CI: buffer_load_dword +; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} +; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1 +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x2 ; CI: ds_write_b32 ; CI: buffer_store_dword define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 { |

