diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/private-memory.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/private-memory.ll | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory.ll b/llvm/test/CodeGen/AMDGPU/private-memory.ll index 79778eebd80..4f52fde6054 100644 --- a/llvm/test/CodeGen/AMDGPU/private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/private-memory.ll @@ -1,4 +1,3 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC @@ -6,7 +5,10 @@ ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=HSAOPT %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck -check-prefix=NOHSAOPT %s + +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}mova_same_clause: @@ -19,6 +21,10 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120 ; HSA-PROMOTE: .end_amd_kernel_code_t +; FIXME: These should be merged +; HSA-PROMOTE: s_load_dword s{{[0-9]+}}, s[4:5], 0x1 +; HSA-PROMOTE: s_load_dword s{{[0-9]+}}, s[4:5], 0x2 + ; SI-PROMOTE: ds_write_b32 ; SI-PROMOTE: ds_write_b32 ; SI-PROMOTE: ds_read_b32 @@ -32,6 +38,25 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 + + +; HSAOPT: [[DISPATCH_PTR:%[0-9]+]] = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() +; HSAOPT: [[CAST_DISPATCH_PTR:%[0-9]+]] = bitcast i8 addrspace(2)* [[DISPATCH_PTR]] to i32 addrspace(2)* +; HSAOPT: [[GEP0:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(2)* [[CAST_DISPATCH_PTR]], i64 1 +; HSAOPT: [[LDXY:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP0]], align 4, !invariant.load !0 +; HSAOPT: [[GEP1:%[0-9]+]] = getelementptr inbounds i32, i32 addrspace(2)* [[CAST_DISPATCH_PTR]], i64 2 +; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP1]], align 4, !range !1, !invariant.load !0 +; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16 + +; HSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !1 +; HSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !1 +; HSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !1 + +; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !0 +; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !0 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !0 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !0 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !0 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: %stack = alloca [5 x i32], align 4 @@ -185,7 +210,7 @@ entry: store i32 1, i32* %2 %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in %4 = load i32, i32* %3 - %5 = call i32 @llvm.r600.read.tidig.x() + %5 = call i32 @llvm.amdgcn.workitem.id.x() %6 = add i32 %4, %5 store i32 %6, i32 addrspace(1)* %out ret void @@ -323,3 +348,8 @@ define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) { store i32 %tmp5, i32 addrspace(1)* %out ret void } + +; HSAOPT: !0 = !{} +; HSAOPT: !1 = !{i32 0, i32 2048} + +; NOHSAOPT: !0 = !{i32 0, i32 2048} |