diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-05-01 16:11:11 +0000 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-05-01 16:11:11 +0000 |
| commit | a224f68a10d6191c881dfd27796a2b363aa003d3 (patch) | |
| tree | f42b965dfd7968f261bb868c9c9e4d25e3edef44 /llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll | |
| parent | 64d57512546e42f45e1f5498e443bc5c580f84f1 (diff) | |
| download | bcm5719-llvm-a224f68a10d6191c881dfd27796a2b363aa003d3.tar.gz bcm5719-llvm-a224f68a10d6191c881dfd27796a2b363aa003d3.zip | |
[AMDGPU] gfx1010 DS implementation
Differential Revision: https://reviews.llvm.org/D61332
llvm-svn: 359696
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll new file mode 100644 index 00000000000..49b1d642686 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll @@ -0,0 +1,262 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SPLIT %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,VECT %s + +; GCN-LABEL: test_local_misaligned_v2: +; GCN-DAG: ds_read2_b32 +; GCN-DAG: ds_write2_b32 +define amdgpu_kernel void @test_local_misaligned_v2(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <2 x i32> addrspace(3)* + %load = load <2 x i32>, <2 x i32> addrspace(3)* %ptr, align 4 + %v1 = extractelement <2 x i32> %load, i32 0 + %v2 = extractelement <2 x i32> %load, i32 1 + %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0 + %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1 + store <2 x i32> %v4, <2 x i32> addrspace(3)* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_local_misaligned_v4: +; GCN-DAG: ds_read2_b32 +; GCN-DAG: ds_read2_b32 +; GCN-DAG: ds_write2_b32 +; GCN-DAG: ds_write2_b32 +define amdgpu_kernel void @test_local_misaligned_v4(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <4 x i32> addrspace(3)* + %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4 + %v1 = extractelement <4 x i32> %load, i32 0 + %v2 = extractelement <4 x i32> %load, i32 1 + %v3 = extractelement <4 x i32> %load, i32 2 + %v4 = extractelement <4 x i32> %load, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %v4, i32 0 + %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 + %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 + %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 + store <4 x i32> %v8, <4 x i32> addrspace(3)* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_local_misaligned_v3: +; GCN-DAG: ds_read2_b32 +; GCN-DAG: ds_read_b32 +; GCN-DAG: ds_write2_b32 +; GCN-DAG: ds_write_b32 +define amdgpu_kernel void @test_local_misaligned_v3(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <3 x i32> addrspace(3)* + %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4 + %v1 = extractelement <3 x i32> %load, i32 0 + %v2 = extractelement <3 x i32> %load, i32 1 + %v3 = extractelement <3 x i32> %load, i32 2 + %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0 + %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1 + %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2 + store <3 x i32> %v7, <3 x i32> addrspace(3)* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_flat_misaligned_v2: +; VECT-DAG: flat_load_dwordx2 v +; VECT-DAG: flat_store_dwordx2 v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_store_dword v +; SPLIT-DAG: flat_store_dword v +define amdgpu_kernel void @test_flat_misaligned_v2(i32* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32* %arg, i32 %lid + %ptr = bitcast i32* %gep to <2 x i32>* + %load = load <2 x i32>, <2 x i32>* %ptr, align 4 + %v1 = extractelement <2 x i32> %load, i32 0 + %v2 = extractelement <2 x i32> %load, i32 1 + %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0 + %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1 + store <2 x i32> %v4, <2 x i32>* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_flat_misaligned_v4: +; VECT-DAG: flat_load_dwordx4 v +; VECT-DAG: flat_store_dwordx4 v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_store_dword v +; SPLIT-DAG: flat_store_dword v +; SPLIT-DAG: flat_store_dword v +; SPLIT-DAG: flat_store_dword v +define amdgpu_kernel void @test_flat_misaligned_v4(i32* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32* %arg, i32 %lid + %ptr = bitcast i32* %gep to <4 x i32>* + %load = load <4 x i32>, <4 x i32>* %ptr, align 4 + %v1 = extractelement <4 x i32> %load, i32 0 + %v2 = extractelement <4 x i32> %load, i32 1 + %v3 = extractelement <4 x i32> %load, i32 2 + %v4 = extractelement <4 x i32> %load, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %v4, i32 0 + %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 + %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 + %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 + store <4 x i32> %v8, <4 x i32>* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_flat_misaligned_v3: +; VECT-DAG: flat_load_dwordx3 v +; VECT-DAG: flat_store_dwordx3 v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_load_dword v +; SPLIT-DAG: flat_store_dword v +; SPLIT-DAG: flat_store_dword v +; SPLIT-DAG: flat_store_dword v +define amdgpu_kernel void @test_flat_misaligned_v3(i32* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32* %arg, i32 %lid + %ptr = bitcast i32* %gep to <3 x i32>* + %load = load <3 x i32>, <3 x i32>* %ptr, align 4 + %v1 = extractelement <3 x i32> %load, i32 0 + %v2 = extractelement <3 x i32> %load, i32 1 + %v3 = extractelement <3 x i32> %load, i32 2 + %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0 + %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1 + %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2 + store <3 x i32> %v7, <3 x i32>* %ptr, align 4 + ret void +} + +; GCN-LABEL: test_local_aligned_v2: +; GCN-DAG: ds_read_b64 +; GCN-DAG: ds_write_b64 +define amdgpu_kernel void @test_local_aligned_v2(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <2 x i32> addrspace(3)* + %load = load <2 x i32>, <2 x i32> addrspace(3)* %ptr, align 8 + %v1 = extractelement <2 x i32> %load, i32 0 + %v2 = extractelement <2 x i32> %load, i32 1 + %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0 + %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1 + store <2 x i32> %v4, <2 x i32> addrspace(3)* %ptr, align 8 + ret void +} + +; GCN-LABEL: test_local_aligned_v3: +; GCN-DAG: ds_read_b64 +; GCN-DAG: ds_read_b32 +; GCN-DAG: ds_write_b64 +; GCN-DAG: ds_write_b32 +define amdgpu_kernel void @test_local_aligned_v3(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <3 x i32> addrspace(3)* + %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16 + %v1 = extractelement <3 x i32> %load, i32 0 + %v2 = extractelement <3 x i32> %load, i32 1 + %v3 = extractelement <3 x i32> %load, i32 2 + %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0 + %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1 + %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2 + store <3 x i32> %v7, <3 x i32> addrspace(3)* %ptr, align 16 + ret void +} + +; GCN-LABEL: test_flat_aligned_v2: +; GCN-DAG: flat_load_dwordx2 v +; GCN-DAG: flat_store_dwordx2 v +define amdgpu_kernel void @test_flat_aligned_v2(i32* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32* %arg, i32 %lid + %ptr = bitcast i32* %gep to <2 x i32>* + %load = load <2 x i32>, <2 x i32>* %ptr, align 8 + %v1 = extractelement <2 x i32> %load, i32 0 + %v2 = extractelement <2 x i32> %load, i32 1 + %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0 + %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1 + store <2 x i32> %v4, <2 x i32>* %ptr, align 8 + ret void +} + +; GCN-LABEL: test_flat_aligned_v4: +; GCN-DAG: flat_load_dwordx4 v +; GCN-DAG: flat_store_dwordx4 v +define amdgpu_kernel void @test_flat_aligned_v4(i32* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32* %arg, i32 %lid + %ptr = bitcast i32* %gep to <4 x i32>* + %load = load <4 x i32>, <4 x i32>* %ptr, align 16 + %v1 = extractelement <4 x i32> %load, i32 0 + %v2 = extractelement <4 x i32> %load, i32 1 + %v3 = extractelement <4 x i32> %load, i32 2 + %v4 = extractelement <4 x i32> %load, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %v4, i32 0 + %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 + %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 + %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 + store <4 x i32> %v8, <4 x i32>* %ptr, align 16 + ret void +} + +; GCN-LABEL: test_local_v4_aligned8: +; GCN-DAG: ds_read2_b64 +; GCN-DAG: ds_write2_b64 +define amdgpu_kernel void @test_local_v4_aligned8(i32 addrspace(3)* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid + %ptr = bitcast i32 addrspace(3)* %gep to <4 x i32> addrspace(3)* + %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8 + %v1 = extractelement <4 x i32> %load, i32 0 + %v2 = extractelement <4 x i32> %load, i32 1 + %v3 = extractelement <4 x i32> %load, i32 2 + %v4 = extractelement <4 x i32> %load, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %v4, i32 0 + %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 + %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 + %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 + store <4 x i32> %v8, <4 x i32> addrspace(3)* %ptr, align 8 + ret void +} + +; GCN-LABEL: test_flat_v4_aligned8: +; VECT-DAG: flat_load_dwordx4 v +; VECT-DAG: flat_store_dwordx4 v +; SPLIT-DAG: flat_load_dwordx2 v +; SPLIT-DAG: flat_load_dwordx2 v +; SPLIT-DAG: flat_store_dwordx2 v +; SPLIT-DAG: flat_store_dwordx2 v +define amdgpu_kernel void @test_flat_v4_aligned8(i32* %arg) { +bb: + %lid = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32* %arg, i32 %lid + %ptr = bitcast i32* %gep to <4 x i32>* + %load = load <4 x i32>, <4 x i32>* %ptr, align 8 + %v1 = extractelement <4 x i32> %load, i32 0 + %v2 = extractelement <4 x i32> %load, i32 1 + %v3 = extractelement <4 x i32> %load, i32 2 + %v4 = extractelement <4 x i32> %load, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %v4, i32 0 + %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1 + %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2 + %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3 + store <4 x i32> %v8, <4 x i32>* %ptr, align 8 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() |

