diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/load-local-i32.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-local-i32.ll | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll index c736586fa21..1dd7daf95ab 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll @@ -3,6 +3,11 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; Testing for ds_read_128 +; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s + ; FUNC-LABEL: {{^}}local_load_i32: ; GCN-NOT: s_wqm_b64 ; SICIVI: s_mov_b32 m0, -1 @@ -175,6 +180,20 @@ define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* ret void } +; Tests if ds_read_b128 gets generated for the 16 byte aligned load. +; FUNC-LABEL: {{^}}local_v4i32_to_128: +; SI-NOT: ds_read_b128 +; CIVI: ds_read_b128 +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define amdgpu_kernel void @local_v4i32_to_128(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) { + %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 16 + store <4 x i32> %ld, <4 x i32> addrspace(3)* %out + ret void +} + ; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64: ; SICIVI: s_mov_b32 m0, -1 ; GFX9-NOT: m0 |