diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-16 02:13:37 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-16 02:13:37 +0000 |
commit | c10783c42d0f86d46b56c70d0eb27b577051e117 (patch) | |
tree | 9a3ad2fdced0978037ded5e4e00e69da804e7274 /llvm/test | |
parent | 6fe1ff260b0461437523ac731008a24c9b63b1bc (diff) | |
download | bcm5719-llvm-c10783c42d0f86d46b56c70d0eb27b577051e117.tar.gz bcm5719-llvm-c10783c42d0f86d46b56c70d0eb27b577051e117.zip |
AMDGPU: Enable LocalStackSlotAllocation pass
This resolves more frame indexes early and folds
the immediate offsets into the scratch mubuf instructions.
This cleans up a lot of the mess that's currently emitted,
such as emitting add 0s and repeatedly initializing the same
register to 0 when spilling.
llvm-svn: 266508
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/captured-frame-index.ll | 87 |
2 files changed, 72 insertions, 20 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll index 9ff9d57d174..64533720ac9 100644 --- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -15,8 +15,11 @@ declare void @llvm.amdgcn.s.barrier() #2 ; FIXME: We end up with zero argument for ADD, because ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index ; with the appropriate offset. We should fold this into the store. + ; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}} -; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}] +; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16 +; SI-ALLOCA: s_barrier +; SI-ALLOCA: buffer_load_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16 ; ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this ; alloca to a vector. It currently fails because it does not know how diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll index 21c8af4fafa..92dd7baa26c 100644 --- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll +++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll @@ -16,25 +16,22 @@ define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 { ; Offset is applied ; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects: -; GCN: s_load_dword [[LDSPTR:s[0-9]+]] -; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}} -; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]] - -; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} -; GCN: buffer_store_dword v{{[0-9]+}}, [[FI1]] +; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}} +; GCN: s_load_dword [[LDSPTR:s[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]] -; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]] +; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO]] ; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} ; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]] define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 { %tmp0 = alloca float %tmp1 = alloca float - store float 4.0, float *%tmp0 - store float 4.0, float *%tmp1 + store float 4.0, float* %tmp0 + store float 4.0, float* %tmp1 store volatile float* %tmp0, float* addrspace(3)* %ptr store volatile float* %tmp1, float* addrspace(3)* %ptr ret void @@ -42,6 +39,10 @@ define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 { ; Same frame index is used multiple times in the store ; GCN-LABEL: {{^}}stored_fi_to_self: +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4d2{{$}} +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword [[K]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN: buffer_store_dword [[ZERO]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} define void @stored_fi_to_self() #0 { %tmp = alloca i32* @@ -52,18 +53,42 @@ define void @stored_fi_to_self() #0 { ret void } +; GCN-LABEL: {{^}}stored_fi_to_self_offset: +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 32{{$}} +; GCN: buffer_store_dword [[K0]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} + +; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x4d2{{$}} +; GCN: buffer_store_dword [[K1]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2048{{$}} + +; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x800{{$}} +; GCN: buffer_store_dword [[OFFSETK]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2048{{$}} +define void @stored_fi_to_self_offset() #0 { + %tmp0 = alloca [512 x i32] + %tmp1 = alloca i32* + + ; Avoid optimizing everything out + %tmp0.cast = bitcast [512 x i32]* %tmp0 to i32* + store volatile i32 32, i32* %tmp0.cast + + store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp1 + + %bitcast = bitcast i32** %tmp1 to i32* + store volatile i32* %bitcast, i32** %tmp1 + ret void +} + ; GCN-LABEL: {{^}}stored_fi_to_fi: -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword +; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:8{{$}} -; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} -; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} -; GCN: buffer_store_dword [[FI1]], [[FI2]] +; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} +; GCN: buffer_store_dword [[FI1]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:8{{$}} -; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} -; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} -; GCN: buffer_store_dword [[FI2]], [[FI1]] +; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} +; GCN: buffer_store_dword [[FI2]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}} define void @stored_fi_to_fi() #0 { %tmp0 = alloca i32* %tmp1 = alloca i32* @@ -114,4 +139,28 @@ define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 { ret void } +; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset: +; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen + +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} +; GCN-DAG: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc +; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]] +; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} + +; GCN-DAG: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56 +; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]] +; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 { + %tmp0 = alloca [4096 x i32] + %tmp1 = alloca [4096 x i32] + %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 0 + store volatile i32 0, i32* %gep0.tmp0 + %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 4095 + store volatile i32 999, i32* %gep1.tmp0 + %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 14 + store i32* %gep0.tmp1, i32* addrspace(1)* %ptr + ret void +} + attributes #0 = { nounwind } |