diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/captured-frame-index.ll | 50 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll | 6 |
2 files changed, 47 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll index 161c46b486e..55b32305687 100644 --- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll +++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll @@ -1,5 +1,17 @@ ; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; GCN-LABEL: {{^}}store_fi_lifetime: +; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword [[FI]] +define void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 { +entry: + %b = alloca i8 + call void @llvm.lifetime.start(i64 1, i8* %b) + store volatile i8* %b, i8* addrspace(1)* undef + call void @llvm.lifetime.end(i64 1, i8* %b) + ret void +} + ; GCN-LABEL: {{^}}stored_fi_to_lds: ; GCN: s_load_dword [[LDSPTR:s[0-9]+]] ; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}} @@ -140,17 +152,18 @@ define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 { } ; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset: -; GCN: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc +; GCN: v_mov_b32_e32 [[VAL_0:v[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}} -; GCN: buffer_store_dword [[BASE_0]], v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen +; GCN: buffer_store_dword [[VAL_0]], [[BASE_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen + +; GCN: v_mov_b32_e32 [[BASE_0_1:v[0-9]+]], 0{{$}} +; GCN: v_add_i32_e32 [[BASE_1_OFF_0:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]] -; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]] ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} -; GCN: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56 -; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 56, [[BASE_0_1]] +; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} -; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]] -; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN: buffer_store_dword [[BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 { %tmp0 = alloca [4096 x i32] %tmp1 = alloca [4096 x i32] @@ -163,4 +176,27 @@ define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 { ret void } +@g1 = external addrspace(1) global i32* + +; This was leaving a dead node around resulting in failing to select +; on the leftover AssertZext's ValueType operand. + +; GCN-LABEL: {{^}}cannot_select_assertzext_valuetype: +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, g1@GOTPCREL+4 +; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword [[FI]] +define void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 { +entry: + %b = alloca i32, align 4 + %tmp1 = load volatile i32*, i32* addrspace(1)* @g1, align 4 + %arrayidx = getelementptr inbounds i32, i32* %tmp1, i32 %idx + %tmp2 = load i32, i32* %arrayidx, align 4 + store volatile i32* %b, i32* addrspace(1)* undef + ret void +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll index eb554e21730..105d20511b8 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -7,8 +7,10 @@ ; ; CHECK-LABEL: {{^}}main: ; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 -; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]] -; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]] +; CHECK-DAG: v_mov_b32_e32 [[ZERO_BASE_FI:v[0-9]+]], 0{{$}} +; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]] +; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]] + ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen define amdgpu_ps float @main(i32 %idx) { |