diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-16 02:13:37 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-16 02:13:37 +0000 | 
| commit | c10783c42d0f86d46b56c70d0eb27b577051e117 (patch) | |
| tree | 9a3ad2fdced0978037ded5e4e00e69da804e7274 /llvm/test/CodeGen | |
| parent | 6fe1ff260b0461437523ac731008a24c9b63b1bc (diff) | |
| download | bcm5719-llvm-c10783c42d0f86d46b56c70d0eb27b577051e117.tar.gz bcm5719-llvm-c10783c42d0f86d46b56c70d0eb27b577051e117.zip | |
AMDGPU: Enable LocalStackSlotAllocation pass
This resolves more frame indexes early and folds
the immediate offsets into the scratch mubuf instructions.
This cleans up a lot of the mess that's currently emitted,
such as emitting add 0s and repeatedly initializing the same
register to 0 when spilling.
llvm-svn: 266508
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/captured-frame-index.ll | 87 | 
2 files changed, 72 insertions, 20 deletions
| diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll index 9ff9d57d174..64533720ac9 100644 --- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -15,8 +15,11 @@ declare void @llvm.amdgcn.s.barrier() #2  ; FIXME: We end up with zero argument for ADD, because  ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index  ; with the appropriate offset.  We should fold this into the store. +  ; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}} -; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}] +; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16 +; SI-ALLOCA: s_barrier +; SI-ALLOCA: buffer_load_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16  ;  ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this  ; alloca to a vector.  It currently fails because it does not know how diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll index 21c8af4fafa..92dd7baa26c 100644 --- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll +++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll @@ -16,25 +16,22 @@ define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {  ; Offset is applied  ; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects: -; GCN: s_load_dword [[LDSPTR:s[0-9]+]] -; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}} -; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]] - -; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} -; GCN: buffer_store_dword v{{[0-9]+}}, [[FI1]] +; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}} +; GCN: s_load_dword [[LDSPTR:s[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}  ; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]] -; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO0]] +; GCN: ds_write_b32  [[VLDSPTR]], [[ZERO]]  ; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}  ; GCN: ds_write_b32  [[VLDSPTR]], [[FI1]]  define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {    %tmp0 = alloca float    %tmp1 = alloca float -  store float 4.0, float *%tmp0 -  store float 4.0, float *%tmp1 +  store float 4.0, float* %tmp0 +  store float 4.0, float* %tmp1    store volatile float* %tmp0, float* addrspace(3)* %ptr    store volatile float* %tmp1, float* addrspace(3)* %ptr    ret void @@ -42,6 +39,10 @@ define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 {  ; Same frame index is used multiple times in the store  ; GCN-LABEL: {{^}}stored_fi_to_self: +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4d2{{$}} +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword [[K]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN: buffer_store_dword [[ZERO]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}  define void @stored_fi_to_self() #0 {    %tmp = alloca i32* @@ -52,18 +53,42 @@ define void @stored_fi_to_self() #0 {    ret void  } +; GCN-LABEL: {{^}}stored_fi_to_self_offset: +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 32{{$}} +; GCN: buffer_store_dword [[K0]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} + +; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x4d2{{$}} +; GCN: buffer_store_dword [[K1]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2048{{$}} + +; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x800{{$}} +; GCN: buffer_store_dword [[OFFSETK]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2048{{$}} +define void @stored_fi_to_self_offset() #0 { +  %tmp0 = alloca [512 x i32] +  %tmp1 = alloca i32* + +  ; Avoid optimizing everything out +  %tmp0.cast = bitcast [512 x i32]* %tmp0 to i32* +  store volatile i32 32, i32* %tmp0.cast + +  store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp1 + +  %bitcast = bitcast i32** %tmp1 to i32* +  store volatile i32* %bitcast, i32** %tmp1 +  ret void +} +  ; GCN-LABEL: {{^}}stored_fi_to_fi: -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword +; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:8{{$}} -; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} -; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} -; GCN: buffer_store_dword [[FI1]], [[FI2]] +; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} +; GCN: buffer_store_dword [[FI1]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:8{{$}} -; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} -; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} -; GCN: buffer_store_dword [[FI2]], [[FI1]] +; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} +; GCN: buffer_store_dword [[FI2]], [[ZERO]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4{{$}}  define void @stored_fi_to_fi() #0 {    %tmp0 = alloca i32*    %tmp1 = alloca i32* @@ -114,4 +139,28 @@ define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {    ret void  } +; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset: +; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen + +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} +; GCN-DAG: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc +; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]] +; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} + +; GCN-DAG: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56 +; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]] +; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 { +  %tmp0 = alloca [4096 x i32] +  %tmp1 = alloca [4096 x i32] +  %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 0 +  store volatile i32 0, i32* %gep0.tmp0 +  %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 4095 +  store volatile i32 999, i32* %gep1.tmp0 +  %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 14 +  store i32* %gep0.tmp1, i32* addrspace(1)* %ptr +  ret void +} +  attributes #0 = { nounwind } | 

