diff options
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/missing-store.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll | 19 |
3 files changed, 21 insertions, 14 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/missing-store.ll b/llvm/test/CodeGen/AMDGPU/missing-store.ll index 4af9cdf1b96..c919b3b5819 100644 --- a/llvm/test/CodeGen/AMDGPU/missing-store.ll +++ b/llvm/test/CodeGen/AMDGPU/missing-store.ll @@ -8,7 +8,9 @@ ; FUNC-LABEL: {{^}}missing_store_reduced: ; SI: ds_read_b64 ; SI: buffer_store_dword -; SI: buffer_load_dword +; SI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} +; SI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; SI: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}} ; SI: buffer_store_dword ; SI: s_endpgm define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index eebd06e6ee9..abd6b7a2c21 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -53,10 +53,14 @@ done: ; preds = %loop ; Test moving an SMRD instruction to the VALU ; GCN-LABEL: {{^}}smrd_valu: -; FIXME: We should be using flat load for HSA. -; GCN: buffer_load_dword [[OUT:v[0-9]+]] -; GCN-NOHSA: buffer_store_dword [[OUT]] -; GCN-HSA: flat_store_dword {{.*}}, [[OUT]] +; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0 +; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} +; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]] +; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8 +; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]] +; GCN-NOHSA: buffer_store_dword [[V_OUT]] +; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]] define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { entry: %tmp = icmp ne i32 %a, 0 diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 416f3969f6a..b1277da6156 100644 --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -70,15 +70,14 @@ define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace ret void } -; Technically we could reorder these, but just comparing the -; instruction type of the load is insufficient. - -; FUNC-LABEL: @no_reorder_constant_load_global_store_constant_load -; CI: buffer_load_dword +; FUNC-LABEL: @reorder_constant_load_global_store_constant_load ; CI: buffer_store_dword -; CI: buffer_load_dword +; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} +; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1 +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x2 ; CI: buffer_store_dword -define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { +define void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8 %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1 @@ -95,8 +94,10 @@ define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1 } ; FUNC-LABEL: @reorder_constant_load_local_store_constant_load -; CI: buffer_load_dword -; CI: buffer_load_dword +; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} +; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1 +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x2 ; CI: ds_write_b32 ; CI: buffer_store_dword define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 { |

