diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-06-02 19:54:26 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-06-02 19:54:26 +0000 |
commit | d1097a38e2b754a98bd60a9581316f0ea9eae6bc (patch) | |
tree | 2e4ea47f0d21051928c068e7765aa5817605e5ce /llvm/test/CodeGen/AMDGPU/local-memory.ll | |
parent | f4e9c9ac08315bed9e5f8ffd0c1612540844370b (diff) | |
download | bcm5719-llvm-d1097a38e2b754a98bd60a9581316f0ea9eae6bc.tar.gz bcm5719-llvm-d1097a38e2b754a98bd60a9581316f0ea9eae6bc.zip |
AMDGPU: Cleanup load tests
There are a lot of different kinds of loads to test for,
and these were scattered around inconsistently with
some redundancy. Try to comprehensively test all loads
in a consistent way.
llvm-svn: 271571
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/local-memory.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/local-memory.ll | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.ll b/llvm/test/CodeGen/AMDGPU/local-memory.ll index 6ccfe737d27..8d48f594b23 100644 --- a/llvm/test/CodeGen/AMDGPU/local-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/local-memory.ll @@ -43,6 +43,41 @@ entry: ret void } +@lds = addrspace(3) global [512 x i32] undef, align 4 + +; On SI we need to make sure that the base offset is a register and not +; an immediate. +; FUNC-LABEL: {{^}}load_i32_local_const_ptr: +; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 +; GCN: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4 +; R600: LDS_READ_RET +define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1 + %tmp1 = load i32, i32 addrspace(3)* %tmp0 + %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 + store i32 %tmp1, i32 addrspace(1)* %tmp2 + ret void +} + +; Test loading a i32 and v2i32 value from the same base pointer. +; FUNC-LABEL: {{^}}load_i32_v2i32_local: +; R600: LDS_READ_RET +; R600: LDS_READ_RET +; R600: LDS_READ_RET +; GCN-DAG: ds_read_b32 +; GCN-DAG: ds_read2_b32 +define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) { + %scalar = load i32, i32 addrspace(3)* %in + %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)* + %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2 + %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4 + %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0 + %vec = add <2 x i32> %vec0, %vec1 + store <2 x i32> %vec, <2 x i32> addrspace(1)* %out + ret void +} + declare i32 @llvm.r600.read.tidig.x() #0 declare void @llvm.AMDGPU.barrier.local() |