summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-09-17 16:09:55 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-09-17 16:09:55 +0000
commitac0fc849cf53b41735526eba7931ee4f3508fb8f (patch)
treef4cf0befd7d3ba8e6255338d85188df82606185d /llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
parentbcfd94c2982e6b8468596390234832653a56fb54 (diff)
downloadbcm5719-llvm-ac0fc849cf53b41735526eba7931ee4f3508fb8f.tar.gz
bcm5719-llvm-ac0fc849cf53b41735526eba7931ee4f3508fb8f.zip
AMDGPU: Fix broken FrameIndex handling
We were trying to avoid using a FrameIndex operand in non-pointer operands in a convoluted way, and would break because of using TargetFrameIndex. The TargetFrameIndex should only be used in the case where it makes sense to fold it as part of the addressing mode, otherwise it requires materialization like a normal constant. This wasn't working reliably and failed in the added testcase, hitting the assert when processing the frame index. The TargetFrameIndex was coming from trying to produce an AssertZext limiting the maximum stack size. I'm not sure this was correct to begin with, because it is apparently possible to have a single workitem dispatch that requires all 4G of private memory. llvm-svn: 281824
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/captured-frame-index.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/captured-frame-index.ll50
1 files changed, 43 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 161c46b486e..55b32305687 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -1,5 +1,17 @@
; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; GCN-LABEL: {{^}}store_fi_lifetime:
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[FI]]
+define void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 {
+entry:
+ %b = alloca i8
+ call void @llvm.lifetime.start(i64 1, i8* %b)
+ store volatile i8* %b, i8* addrspace(1)* undef
+ call void @llvm.lifetime.end(i64 1, i8* %b)
+ ret void
+}
+
; GCN-LABEL: {{^}}stored_fi_to_lds:
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
@@ -140,17 +152,18 @@ define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
}
; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
-; GCN: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc
+; GCN: v_mov_b32_e32 [[VAL_0:v[0-9]+]], 0{{$}}
; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[BASE_0]], v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
+; GCN: buffer_store_dword [[VAL_0]], [[BASE_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
+
+; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 0{{$}}
+; GCN: v_add_i32_e32 [[BASE_1_OFF_0:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
-; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]]
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; GCN: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56
-; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
+; GCN: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 56, [[BASE_0_1]]
+; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
-; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]]
-; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
%tmp0 = alloca [4096 x i32]
%tmp1 = alloca [4096 x i32]
@@ -163,4 +176,27 @@ define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
ret void
}
+@g1 = external addrspace(1) global i32*
+
+; This was leaving a dead node around resulting in failing to select
+; on the leftover AssertZext's ValueType operand.
+
+; GCN-LABEL: {{^}}cannot_select_assertzext_valuetype:
+; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, g1@GOTPCREL+4
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_dword [[FI]]
+define void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 {
+entry:
+ %b = alloca i32, align 4
+ %tmp1 = load volatile i32*, i32* addrspace(1)* @g1, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %tmp1, i32 %idx
+ %tmp2 = load i32, i32* %arrayidx, align 4
+ store volatile i32* %b, i32* addrspace(1)* undef
+ ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
OpenPOWER on IntegriCloud